[llvm] [llubi] Add support for load/store/lifetime markers (PR #182532)

Fri Mar 6 09:56:03 PST 2026

https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/182532

>From ac3c7c26851441920c3822403efa0a57b97d294a Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 20 Feb 2026 02:46:23 +0800
Subject: [PATCH 1/6] [llubi] Add support for load/store

---
 llvm/docs/CommandGuide/llubi.rst              |  12 +
 llvm/test/tools/llubi/loadstore_be.ll         | 153 +++++++++
 llvm/test/tools/llubi/loadstore_le.ll         | 155 +++++++++
 llvm/test/tools/llubi/loadstore_misaligned.ll |  14 +
 llvm/test/tools/llubi/loadstore_null.ll       |  10 +
 llvm/test/tools/llubi/loadstore_oob1.ll       |  14 +
 llvm/test/tools/llubi/loadstore_poison.ll     |  10 +
 llvm/test/tools/llubi/loadstore_uaf.ll        |  21 ++
 llvm/test/tools/llubi/store_dead.ll           |  18 +
 llvm/tools/llubi/lib/Context.cpp              | 309 ++++++++++++++++--
 llvm/tools/llubi/lib/Context.h                |  36 +-
 llvm/tools/llubi/lib/Interpreter.cpp          | 127 ++++++-
 llvm/tools/llubi/lib/Value.h                  |  63 +++-
 llvm/tools/llubi/llubi.cpp                    |  23 +-
 14 files changed, 904 insertions(+), 61 deletions(-)
 create mode 100644 llvm/test/tools/llubi/loadstore_be.ll
 create mode 100644 llvm/test/tools/llubi/loadstore_le.ll
 create mode 100644 llvm/test/tools/llubi/loadstore_misaligned.ll
 create mode 100644 llvm/test/tools/llubi/loadstore_null.ll
 create mode 100644 llvm/test/tools/llubi/loadstore_oob1.ll
 create mode 100644 llvm/test/tools/llubi/loadstore_poison.ll
 create mode 100644 llvm/test/tools/llubi/loadstore_uaf.ll
 create mode 100644 llvm/test/tools/llubi/store_dead.ll

diff --git a/llvm/docs/CommandGuide/llubi.rst b/llvm/docs/CommandGuide/llubi.rst
index f652af83d810a..55528373cffef 100644
--- a/llvm/docs/CommandGuide/llubi.rst
+++ b/llvm/docs/CommandGuide/llubi.rst
@@ -70,6 +70,18 @@ INTERPRETER OPTIONS
 
   Set the value of `llvm.vscale` to N. The default value is 4.
 
+.. option:: -seed=N
+
+  Set the seed for random number generator to N. By default, the seed is 0.
+
+.. option:: -undef-behavior=mode
+
+  Set the behavior for undefined values (e.g., load from uninitialized memory or freeze a poison value).
+  The options for `mode` are:
+
+  * `nondet`: Each load from the same uninitialized byte yields a freshly random value. This is the default behavior.
+  * `zero`: Uninitialized values are treated as zero.
+
 EXIT STATUS
 -----------
 
diff --git a/llvm/test/tools/llubi/loadstore_be.ll b/llvm/test/tools/llubi/loadstore_be.ll
new file mode 100644
index 0000000000000..0b78536cd808f
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_be.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+%struct = type { [2 x i16], i64 }
+%struct.packed = type <{ [2 x i16], i64 }>
+%struct.vscale = type { <vscale x 1 x i32>, <vscale x 1 x i32> }
+
+define void @main() {
+  %alloc = alloca i32
+  store i32 u0x00010203, ptr %alloc
+  %val1 = load i32, ptr %alloc
+  %val2 = load i32, ptr %alloc, align 2
+  %gep = getelementptr i8, ptr %alloc, i64 1
+  %val3 = load i8, ptr %gep
+  %val4 = load <4 x i8>, ptr %alloc
+
+  store i16 u0x0405, ptr %gep, align 1
+  %val5 = load <4 x i8>, ptr %alloc
+  store <2 x i16> <i16 u0x0607, i16 u0x0809>, ptr %alloc
+  %val6 = load <4 x i8>, ptr %alloc
+  %val7 = load <8 x i4>, ptr %alloc
+  store <3 x i3> <i3 1, i3 2, i3 3>, ptr %alloc
+  ; padding bits are undefined.
+  %val8 = load <16 x i1>, ptr %alloc
+  %val9 = load <16 x i1>, ptr %alloc
+  store <8 x i3> <i3 0, i3 1, i3 2, i3 3, i3 4, i3 5, i3 6, i3 7>, ptr %alloc
+  %val_bitcast = load <3 x i8>, ptr %alloc
+
+  ; For non-byte-sized types, the padding bits must be zero.
+  store i25 -1, ptr %alloc
+  %val10 = load <4 x i8>, ptr %alloc
+  store i8 -1, ptr %alloc
+  ; If the padding bits were not zero, load yields poison value.
+  %val11 = load i25, ptr %alloc
+
+  call void @llvm.lifetime.start.p0(ptr poison)
+  call void @llvm.lifetime.end.p0(ptr poison)
+  %alloc_lifetime = alloca i32
+  ; Load of an dead object yields poison value.
+  %val12 = load i32, ptr %alloc_lifetime
+  call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+  ; After lifetime.start, the object is alive but uninitialized.
+  %val13 = load i32, ptr %alloc_lifetime
+  %val14 = load i32, ptr %alloc_lifetime
+  store i32 77, ptr %alloc_lifetime
+  %val15 = load i32, ptr %alloc_lifetime
+  call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
+  ; Load of an dead object yields poison value.
+  %val16 = load i32, ptr %alloc_lifetime
+
+  store i32 u0xFFF80000, ptr %alloc
+  %val17 = load float, ptr %alloc
+
+  %alloc_vscale = alloca <vscale x 2 x i32>
+  %insert = insertelement <vscale x 1 x i32> poison, i32 1, i32 0
+  %ones = shufflevector <vscale x 1 x i32> %insert, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %twos = add <vscale x 1 x i32> %ones, %ones
+  store <vscale x 1 x i32> %ones, ptr %alloc_vscale
+  %gep3 = getelementptr <vscale x 1 x i32>, ptr %alloc_vscale, i64 1
+  store <vscale x 1 x i32> %twos, ptr %gep3
+  %val18 = load <vscale x 2 x i32>, ptr %alloc_vscale
+
+  %alloc_struct = alloca %struct
+  store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct
+  %val19 = load %struct, ptr %alloc_struct
+  ; Padding bytes of struct are undefined.
+  %val20 = load i64, ptr %alloc_struct
+  %val21 = load i64, ptr %alloc_struct
+
+  %alloc_struct_packed = alloca %struct.packed
+  store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed
+  %val22 = load %struct.packed, ptr %alloc_struct_packed
+  ; No padding bytes.
+  %val23 = load i64, ptr %alloc_struct_packed
+  %val24 = load i64, ptr %alloc_struct_packed
+
+  %alloc_struct_vscale = alloca %struct.vscale
+  store %struct.vscale zeroinitializer, ptr %alloc_struct_vscale
+  %gep4 = getelementptr <vscale x 1 x i32>, ptr %alloc_struct_vscale, i32 1
+  store <vscale x 1 x i32> %ones, ptr %gep4
+  %val25 = load %struct.vscale, ptr %alloc_struct_vscale
+
+  %alloc_array = alloca [2 x i32]
+  store [2 x i32] [i32 1, i32 2], ptr %alloc_array
+  %val26 = load [2 x i32], ptr %alloc_array
+
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT:   %alloc = alloca i32, align 4 => ptr 0x8 [alloc]
+; CHECK-NEXT:   store i32 66051, ptr %alloc, align 4
+; CHECK-NEXT:   %val1 = load i32, ptr %alloc, align 4 => i32 66051
+; CHECK-NEXT:   %val2 = load i32, ptr %alloc, align 2 => i32 66051
+; CHECK-NEXT:   %gep = getelementptr i8, ptr %alloc, i64 1 => ptr 0x9 [alloc + 1]
+; CHECK-NEXT:   %val3 = load i8, ptr %gep, align 1 => i8 1
+; CHECK-NEXT:   %val4 = load <4 x i8>, ptr %alloc, align 4 => { i8 3, i8 2, i8 1, i8 0 }
+; CHECK-NEXT:   store i16 1029, ptr %gep, align 1
+; CHECK-NEXT:   %val5 = load <4 x i8>, ptr %alloc, align 4 => { i8 3, i8 5, i8 4, i8 0 }
+; CHECK-NEXT:   store <2 x i16> <i16 1543, i16 2057>, ptr %alloc, align 4
+; CHECK-NEXT:   %val6 = load <4 x i8>, ptr %alloc, align 4 => { i8 7, i8 6, i8 9, i8 8 }
+; CHECK-NEXT:   %val7 = load <8 x i4>, ptr %alloc, align 4 => { i4 0, i4 7, i4 0, i4 6, i4 0, i4 -7, i4 0, i4 -8 }
+; CHECK-NEXT:   store <3 x i3> <i3 1, i3 2, i3 3>, ptr %alloc, align 2
+; CHECK-NEXT:   %val8 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, F, T, F, F, F, T, F, T, F, F, T, T }
+; CHECK-NEXT:   %val9 = load <16 x i1>, ptr %alloc, align 2 => { F, F, T, F, F, T, F, F, F, T, F, T, F, F, T, T }
+; CHECK-NEXT:   store <8 x i3> <i3 0, i3 1, i3 2, i3 3, i3 -4, i3 -3, i3 -2, i3 -1>, ptr %alloc, align 4
+; CHECK-NEXT:   %val_bitcast = load <3 x i8>, ptr %alloc, align 4 => { i8 5, i8 57, i8 119 }
+; CHECK-NEXT:   store i25 -1, ptr %alloc, align 4
+; CHECK-NEXT:   %val10 = load <4 x i8>, ptr %alloc, align 4 => { i8 -1, i8 -1, i8 -1, i8 1 }
+; CHECK-NEXT:   store i8 -1, ptr %alloc, align 1
+; CHECK-NEXT:   %val11 = load i25, ptr %alloc, align 4 => poison
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr poison)
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr poison)
+; CHECK-NEXT:   %alloc_lifetime = alloca i32, align 4 => ptr 0xC [alloc_lifetime]
+; CHECK-NEXT:   %val12 = load i32, ptr %alloc_lifetime, align 4 => poison
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+; CHECK-NEXT:   %val13 = load i32, ptr %alloc_lifetime, align 4 => i32 -1295355583
+; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 -1809495666
+; CHECK-NEXT:   store i32 77, ptr %alloc_lifetime, align 4
+; CHECK-NEXT:   %val15 = load i32, ptr %alloc_lifetime, align 4 => i32 77
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
+; CHECK-NEXT:   %val16 = load i32, ptr %alloc_lifetime, align 4 => poison
+; CHECK-NEXT:   store i32 -524288, ptr %alloc, align 4
+; CHECK-NEXT:   %val17 = load float, ptr %alloc, align 4 => NaN
+; CHECK-NEXT:   %alloc_vscale = alloca <vscale x 2 x i32>, align 8 => ptr 0x10 [alloc_vscale]
+; CHECK-NEXT:   %insert = insertelement <vscale x 1 x i32> poison, i32 1, i32 0 => { i32 1, poison, poison, poison }
+; CHECK-NEXT:   %ones = shufflevector <vscale x 1 x i32> %insert, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer => { i32 1, i32 1, i32 1, i32 1 }
+; CHECK-NEXT:   %twos = add <vscale x 1 x i32> %ones, %ones => { i32 2, i32 2, i32 2, i32 2 }
+; CHECK-NEXT:   store <vscale x 1 x i32> %ones, ptr %alloc_vscale, align 4
+; CHECK-NEXT:   %gep3 = getelementptr <vscale x 1 x i32>, ptr %alloc_vscale, i64 1 => ptr 0x20 [alloc_vscale + 16]
+; CHECK-NEXT:   store <vscale x 1 x i32> %twos, ptr %gep3, align 4
+; CHECK-NEXT:   %val18 = load <vscale x 2 x i32>, ptr %alloc_vscale, align 8 => { i32 2, i32 2, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1 }
+; CHECK-NEXT:   %alloc_struct = alloca %struct, align 8 => ptr 0x30 [alloc_struct]
+; CHECK-NEXT:   store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct, align 8
+; CHECK-NEXT:   %val19 = load %struct, ptr %alloc_struct, align 8 => { { i16 1, i16 2 }, i64 3 }
+; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 281483653031312
+; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 281487549378445
+; CHECK-NEXT:   %alloc_struct_packed = alloca %struct.packed, align 8 => ptr 0x40 [alloc_struct_packed]
+; CHECK-NEXT:   store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed, align 1
+; CHECK-NEXT:   %val22 = load %struct.packed, ptr %alloc_struct_packed, align 1 => { { i16 1, i16 2 }, i64 3 }
+; CHECK-NEXT:   %val23 = load i64, ptr %alloc_struct_packed, align 8 => i64 281483566645248
+; CHECK-NEXT:   %val24 = load i64, ptr %alloc_struct_packed, align 8 => i64 281483566645248
+; CHECK-NEXT:   %alloc_struct_vscale = alloca %struct.vscale, align 8 => ptr 0x50 [alloc_struct_vscale]
+; CHECK-NEXT:   store %struct.vscale zeroinitializer, ptr %alloc_struct_vscale, align 4
+; CHECK-NEXT:   %gep4 = getelementptr <vscale x 1 x i32>, ptr %alloc_struct_vscale, i32 1 => ptr 0x60 [alloc_struct_vscale + 16]
+; CHECK-NEXT:   store <vscale x 1 x i32> %ones, ptr %gep4, align 4
+; CHECK-NEXT:   %val25 = load %struct.vscale, ptr %alloc_struct_vscale, align 4 => { { i32 0, i32 0, i32 0, i32 0 }, { i32 1, i32 1, i32 1, i32 1 } }
+; CHECK-NEXT:   %alloc_array = alloca [2 x i32], align 4 => ptr 0x70 [alloc_array]
+; CHECK-NEXT:   store [2 x i32] [i32 1, i32 2], ptr %alloc_array, align 4
+; CHECK-NEXT:   %val26 = load [2 x i32], ptr %alloc_array, align 4 => { i32 1, i32 2 }
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: Exiting function: main
diff --git a/llvm/test/tools/llubi/loadstore_le.ll b/llvm/test/tools/llubi/loadstore_le.ll
new file mode 100644
index 0000000000000..62caea6aceacc
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_le.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+%struct = type { [2 x i16], i64 }
+%struct.packed = type <{ [2 x i16], i64 }>
+%struct.vscale = type { <vscale x 1 x i32>, <vscale x 1 x i32> }
+
+define void @main() {
+  %alloc = alloca i32
+  store i32 u0x00010203, ptr %alloc
+  %val1 = load i32, ptr %alloc
+  %val2 = load i32, ptr %alloc, align 2
+  %gep = getelementptr i8, ptr %alloc, i64 1
+  %val3 = load i8, ptr %gep
+  %val4 = load <4 x i8>, ptr %alloc
+
+  store i16 u0x0405, ptr %gep, align 1
+  %val5 = load <4 x i8>, ptr %alloc
+  store <2 x i16> <i16 u0x0607, i16 u0x0809>, ptr %alloc
+  %val6 = load <4 x i8>, ptr %alloc
+  %val7 = load <8 x i4>, ptr %alloc
+  store <3 x i3> <i3 1, i3 2, i3 3>, ptr %alloc
+  ; padding bits are undefined.
+  %val8 = load <16 x i1>, ptr %alloc
+  %val9 = load <16 x i1>, ptr %alloc
+  store <8 x i3> <i3 0, i3 1, i3 2, i3 3, i3 4, i3 5, i3 6, i3 7>, ptr %alloc
+  %val_bitcast = load <3 x i8>, ptr %alloc
+
+  ; For non-byte-sized types, the padding bits must be zero.
+  store i25 -1, ptr %alloc
+  %val10 = load <4 x i8>, ptr %alloc
+  %gep2 = getelementptr i8, ptr %alloc, i64 3
+  store i8 -1, ptr %gep2
+  ; If the padding bits were not zero, load yields poison value.
+  %val11 = load i25, ptr %alloc
+
+  call void @llvm.lifetime.start.p0(ptr poison)
+  call void @llvm.lifetime.end.p0(ptr poison)
+  %alloc_lifetime = alloca i32
+  ; Load of an dead object yields poison value.
+  %val12 = load i32, ptr %alloc_lifetime
+  call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+  ; After lifetime.start, the object is alive but uninitialized.
+  %val13 = load i32, ptr %alloc_lifetime
+  %val14 = load i32, ptr %alloc_lifetime
+  store i32 77, ptr %alloc_lifetime
+  %val15 = load i32, ptr %alloc_lifetime
+  call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
+  ; Load of an dead object yields poison value.
+  %val16 = load i32, ptr %alloc_lifetime
+
+  store i32 u0xFFF80000, ptr %alloc
+  %val17 = load float, ptr %alloc
+
+  %alloc_vscale = alloca <vscale x 2 x i32>
+  %insert = insertelement <vscale x 1 x i32> poison, i32 1, i32 0
+  %ones = shufflevector <vscale x 1 x i32> %insert, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %twos = add <vscale x 1 x i32> %ones, %ones
+  store <vscale x 1 x i32> %ones, ptr %alloc_vscale
+  %gep3 = getelementptr <vscale x 1 x i32>, ptr %alloc_vscale, i64 1
+  store <vscale x 1 x i32> %twos, ptr %gep3
+  %val18 = load <vscale x 2 x i32>, ptr %alloc_vscale
+
+  %alloc_struct = alloca %struct
+  store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct
+  %val19 = load %struct, ptr %alloc_struct
+  ; Padding bytes of struct are undefined.
+  %val20 = load i64, ptr %alloc_struct
+  %val21 = load i64, ptr %alloc_struct
+
+  %alloc_struct_packed = alloca %struct.packed
+  store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed
+  %val22 = load %struct.packed, ptr %alloc_struct_packed
+  ; No padding bytes.
+  %val23 = load i64, ptr %alloc_struct_packed
+  %val24 = load i64, ptr %alloc_struct_packed
+
+  %alloc_struct_vscale = alloca %struct.vscale
+  store %struct.vscale zeroinitializer, ptr %alloc_struct_vscale
+  %gep4 = getelementptr <vscale x 1 x i32>, ptr %alloc_struct_vscale, i32 1
+  store <vscale x 1 x i32> %ones, ptr %gep4
+  %val25 = load %struct.vscale, ptr %alloc_struct_vscale
+
+  %alloc_array = alloca [2 x i32]
+  store [2 x i32] [i32 1, i32 2], ptr %alloc_array
+  %val26 = load [2 x i32], ptr %alloc_array
+
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT:   %alloc = alloca i32, align 4 => ptr 0x8 [alloc]
+; CHECK-NEXT:   store i32 66051, ptr %alloc, align 4
+; CHECK-NEXT:   %val1 = load i32, ptr %alloc, align 4 => i32 66051
+; CHECK-NEXT:   %val2 = load i32, ptr %alloc, align 2 => i32 66051
+; CHECK-NEXT:   %gep = getelementptr i8, ptr %alloc, i64 1 => ptr 0x9 [alloc + 1]
+; CHECK-NEXT:   %val3 = load i8, ptr %gep, align 1 => i8 2
+; CHECK-NEXT:   %val4 = load <4 x i8>, ptr %alloc, align 4 => { i8 3, i8 2, i8 1, i8 0 }
+; CHECK-NEXT:   store i16 1029, ptr %gep, align 1
+; CHECK-NEXT:   %val5 = load <4 x i8>, ptr %alloc, align 4 => { i8 3, i8 5, i8 4, i8 0 }
+; CHECK-NEXT:   store <2 x i16> <i16 1543, i16 2057>, ptr %alloc, align 4
+; CHECK-NEXT:   %val6 = load <4 x i8>, ptr %alloc, align 4 => { i8 7, i8 6, i8 9, i8 8 }
+; CHECK-NEXT:   %val7 = load <8 x i4>, ptr %alloc, align 4 => { i4 7, i4 0, i4 6, i4 0, i4 -7, i4 0, i4 -8, i4 0 }
+; CHECK-NEXT:   store <3 x i3> <i3 1, i3 2, i3 3>, ptr %alloc, align 2
+; CHECK-NEXT:   %val8 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, T, F, T, T, F, F, T, F, F, F, F, T }
+; CHECK-NEXT:   %val9 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, T, F, T, T, F, F, T, F, F, T, F, F }
+; CHECK-NEXT:   store <8 x i3> <i3 0, i3 1, i3 2, i3 3, i3 -4, i3 -3, i3 -2, i3 -1>, ptr %alloc, align 4
+; CHECK-NEXT:   %val_bitcast = load <3 x i8>, ptr %alloc, align 4 => { i8 -120, i8 -58, i8 -6 }
+; CHECK-NEXT:   store i25 -1, ptr %alloc, align 4
+; CHECK-NEXT:   %val10 = load <4 x i8>, ptr %alloc, align 4 => { i8 -1, i8 -1, i8 -1, i8 1 }
+; CHECK-NEXT:   %gep2 = getelementptr i8, ptr %alloc, i64 3 => ptr 0xB [alloc + 3]
+; CHECK-NEXT:   store i8 -1, ptr %gep2, align 1
+; CHECK-NEXT:   %val11 = load i25, ptr %alloc, align 4 => poison
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr poison)
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr poison)
+; CHECK-NEXT:   %alloc_lifetime = alloca i32, align 4 => ptr 0xC [alloc_lifetime]
+; CHECK-NEXT:   %val12 = load i32, ptr %alloc_lifetime, align 4 => poison
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+; CHECK-NEXT:   %val13 = load i32, ptr %alloc_lifetime, align 4 => i32 -1295355583
+; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 -1809495666
+; CHECK-NEXT:   store i32 77, ptr %alloc_lifetime, align 4
+; CHECK-NEXT:   %val15 = load i32, ptr %alloc_lifetime, align 4 => i32 77
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
+; CHECK-NEXT:   %val16 = load i32, ptr %alloc_lifetime, align 4 => poison
+; CHECK-NEXT:   store i32 -524288, ptr %alloc, align 4
+; CHECK-NEXT:   %val17 = load float, ptr %alloc, align 4 => NaN
+; CHECK-NEXT:   %alloc_vscale = alloca <vscale x 2 x i32>, align 8 => ptr 0x10 [alloc_vscale]
+; CHECK-NEXT:   %insert = insertelement <vscale x 1 x i32> poison, i32 1, i32 0 => { i32 1, poison, poison, poison }
+; CHECK-NEXT:   %ones = shufflevector <vscale x 1 x i32> %insert, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer => { i32 1, i32 1, i32 1, i32 1 }
+; CHECK-NEXT:   %twos = add <vscale x 1 x i32> %ones, %ones => { i32 2, i32 2, i32 2, i32 2 }
+; CHECK-NEXT:   store <vscale x 1 x i32> %ones, ptr %alloc_vscale, align 4
+; CHECK-NEXT:   %gep3 = getelementptr <vscale x 1 x i32>, ptr %alloc_vscale, i64 1 => ptr 0x20 [alloc_vscale + 16]
+; CHECK-NEXT:   store <vscale x 1 x i32> %twos, ptr %gep3, align 4
+; CHECK-NEXT:   %val18 = load <vscale x 2 x i32>, ptr %alloc_vscale, align 8 => { i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2 }
+; CHECK-NEXT:   %alloc_struct = alloca %struct, align 8 => ptr 0x30 [alloc_struct]
+; CHECK-NEXT:   store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct, align 8
+; CHECK-NEXT:   %val19 = load %struct, ptr %alloc_struct, align 8 => { { i16 1, i16 2 }, i64 3 }
+; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 371025319710294017
+; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 -1341035243900895231
+; CHECK-NEXT:   %alloc_struct_packed = alloca %struct.packed, align 8 => ptr 0x40 [alloc_struct_packed]
+; CHECK-NEXT:   store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed, align 1
+; CHECK-NEXT:   %val22 = load %struct.packed, ptr %alloc_struct_packed, align 1 => { { i16 1, i16 2 }, i64 3 }
+; CHECK-NEXT:   %val23 = load i64, ptr %alloc_struct_packed, align 8 => i64 12885032961
+; CHECK-NEXT:   %val24 = load i64, ptr %alloc_struct_packed, align 8 => i64 12885032961
+; CHECK-NEXT:   %alloc_struct_vscale = alloca %struct.vscale, align 8 => ptr 0x50 [alloc_struct_vscale]
+; CHECK-NEXT:   store %struct.vscale zeroinitializer, ptr %alloc_struct_vscale, align 4
+; CHECK-NEXT:   %gep4 = getelementptr <vscale x 1 x i32>, ptr %alloc_struct_vscale, i32 1 => ptr 0x60 [alloc_struct_vscale + 16]
+; CHECK-NEXT:   store <vscale x 1 x i32> %ones, ptr %gep4, align 4
+; CHECK-NEXT:   %val25 = load %struct.vscale, ptr %alloc_struct_vscale, align 4 => { { i32 0, i32 0, i32 0, i32 0 }, { i32 1, i32 1, i32 1, i32 1 } }
+; CHECK-NEXT:   %alloc_array = alloca [2 x i32], align 4 => ptr 0x70 [alloc_array]
+; CHECK-NEXT:   store [2 x i32] [i32 1, i32 2], ptr %alloc_array, align 4
+; CHECK-NEXT:   %val26 = load [2 x i32], ptr %alloc_array, align 4 => { i32 1, i32 2 }
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: Exiting function: main
diff --git a/llvm/test/tools/llubi/loadstore_misaligned.ll b/llvm/test/tools/llubi/loadstore_misaligned.ll
new file mode 100644
index 0000000000000..cc80639d773bd
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_misaligned.ll
@@ -0,0 +1,14 @@
+; RUN: sed 's/OP1/store i32 0/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+; RUN: sed 's/OP1/%res = load i32/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+
+define void @main() {
+  %alloc = alloca [2 x i32], align 8
+  %gep = getelementptr inbounds [2 x i32], ptr %alloc, i64 0, i64 1
+  OP1, ptr %gep, align 8
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %alloc = alloca [2 x i32], align 8 => ptr 0x8 [alloc]
+; CHECK-NEXT: %gep = getelementptr inbounds [2 x i32], ptr %alloc, i64 0, i64 1 => ptr 0xC [alloc + 4]
+; CHECK-NEXT: Immediate UB detected: Misaligned memory access.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/test/tools/llubi/loadstore_null.ll b/llvm/test/tools/llubi/loadstore_null.ll
new file mode 100644
index 0000000000000..7a243784e6bd6
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_null.ll
@@ -0,0 +1,10 @@
+; RUN: sed 's/OP1/store i32 0/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+; RUN: sed 's/OP1/%res = load i32/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+
+define void @main() {
+  OP1, ptr null
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: Immediate UB detected: Invalid memory access via a pointer with nullary provenance.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/test/tools/llubi/loadstore_oob1.ll b/llvm/test/tools/llubi/loadstore_oob1.ll
new file mode 100644
index 0000000000000..0618faa96bfde
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_oob1.ll
@@ -0,0 +1,14 @@
+; RUN: sed 's/OP1/store i32 0/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+; RUN: sed 's/OP1/%res = load i32/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+
+define void @main() {
+  %alloc = alloca [2 x i32]
+  %gep = getelementptr inbounds [2 x i32], ptr %alloc, i64 0, i64 2
+  OP1, ptr %gep
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: %alloc = alloca [2 x i32], align 4 => ptr 0x8 [alloc]
+; CHECK-NEXT: %gep = getelementptr inbounds [2 x i32], ptr %alloc, i64 0, i64 2 => ptr 0x10 [alloc + 8]
+; CHECK-NEXT: Immediate UB detected: Memory access is out of bounds.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/test/tools/llubi/loadstore_poison.ll b/llvm/test/tools/llubi/loadstore_poison.ll
new file mode 100644
index 0000000000000..44c72aa803709
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_poison.ll
@@ -0,0 +1,10 @@
+; RUN: sed 's/OP1/store i32 0/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+; RUN: sed 's/OP1/%res = load i32/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+
+define void @main() {
+  OP1, ptr poison
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: Immediate UB detected: Invalid memory access with a poison pointer.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/test/tools/llubi/loadstore_uaf.ll b/llvm/test/tools/llubi/loadstore_uaf.ll
new file mode 100644
index 0000000000000..21b86552275b6
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_uaf.ll
@@ -0,0 +1,21 @@
+; RUN: sed 's/OP1/store i32 0/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+; RUN: sed 's/OP1/%res = load i32/g' %s | not llubi --verbose 2>&1 | FileCheck %s
+
+define ptr @stack_object() {
+  %alloc = alloca i32
+  ret ptr %alloc
+}
+
+define void @main() {
+  %alloc = call ptr @stack_object()
+  OP1, ptr %alloc
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT: Entering function: stack_object
+; CHECK-NEXT:   %alloc = alloca i32, align 4 => ptr 0x8 [alloc]
+; CHECK-NEXT:   ret ptr %alloc
+; CHECK-NEXT: Exiting function: stack_object
+; CHECK-NEXT:   %alloc = call ptr @stack_object() => ptr 0x8 [dangling]
+; CHECK-NEXT: Immediate UB detected: Try to access a dead memory object.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/test/tools/llubi/store_dead.ll b/llvm/test/tools/llubi/store_dead.ll
new file mode 100644
index 0000000000000..8bd15beefb8d3
--- /dev/null
+++ b/llvm/test/tools/llubi/store_dead.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: not llubi --verbose < %s 2>&1 | FileCheck %s
+
+define void @main() {
+  %alloc = alloca i32
+  call void @llvm.lifetime.start.p0(ptr %alloc)
+  store i32 0, ptr %alloc
+  call void @llvm.lifetime.end.p0(ptr %alloc)
+  store i32 0, ptr %alloc
+  ret void
+}
+; CHECK: Entering function: main
+; CHECK-NEXT:   %alloc = alloca i32, align 4 => ptr 0x8 [alloc]
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc)
+; CHECK-NEXT:   store i32 0, ptr %alloc, align 4
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc)
+; CHECK-NEXT: Immediate UB detected: Try to access a dead memory object.
+; CHECK-NEXT: error: Execution of function 'main' failed.
diff --git a/llvm/tools/llubi/lib/Context.cpp b/llvm/tools/llubi/lib/Context.cpp
index adedfbcc3886b..1e7b0dd723dc2 100644
--- a/llvm/tools/llubi/lib/Context.cpp
+++ b/llvm/tools/llubi/lib/Context.cpp
@@ -103,6 +103,267 @@ const AnyValue &Context::getConstantValue(Constant *C) {
   return ConstCache.emplace(C, getConstantValueImpl(C)).first->second;
 }
 
+AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
+                            uint32_t &OffsetInBits, bool CheckPaddingBits) {
+  if (Ty->isIntegerTy() || Ty->isFloatingPointTy() || Ty->isPointerTy()) {
+    uint32_t NumBits = DL.getTypeSizeInBits(Ty).getFixedValue();
+    uint32_t NewOffsetInBits = OffsetInBits + NumBits;
+    if (CheckPaddingBits)
+      NewOffsetInBits = alignTo(NewOffsetInBits, 8);
+    bool NeedsPadding = NewOffsetInBits != OffsetInBits + NumBits;
+    uint32_t NumBitsToExtract = NewOffsetInBits - OffsetInBits;
+    SmallVector<uint64_t> BitsData(alignTo(NumBitsToExtract, 8));
+    for (uint32_t I = 0; I < NumBitsToExtract; I += 8) {
+      uint32_t NumBitsInByte = std::min(8U, NumBitsToExtract - I);
+      uint32_t BitsStart =
+          OffsetInBits +
+          (DL.isLittleEndian() ? I : (NumBitsToExtract - NumBitsInByte - I));
+      uint32_t BitsEnd = BitsStart + NumBitsInByte - 1;
+      Byte LogicalByte;
+      if (((BitsStart ^ BitsEnd) & ~7) == 0)
+        LogicalByte = Bytes[BitsStart / 8].lshr(BitsStart % 8);
+      else
+        LogicalByte =
+            Bytes[BitsStart / 8].fshr(Bytes[BitsEnd / 8], BitsStart % 8);
+
+      uint32_t Mask = (1U << NumBitsInByte) - 1;
+      // If any of the bits in the byte is poison, the whole value is poison.
+      if (~LogicalByte.ConcreteMask & ~LogicalByte.Value & Mask)
+        return AnyValue::poison();
+      uint8_t RandomBits = 0;
+      if (UndefBehavior == UndefValueBehavior::NonDeterministic &&
+          (~LogicalByte.ConcreteMask & Mask)) {
+        // This byte contains undef bits.
+        std::uniform_int_distribution<uint8_t> Distrib;
+        RandomBits = Distrib(Rng);
+      }
+      uint8_t ActualBits = ((LogicalByte.Value & LogicalByte.ConcreteMask) |
+                            (RandomBits & ~LogicalByte.ConcreteMask)) &
+                           Mask;
+      BitsData[I / 64] |= static_cast<APInt::WordType>(ActualBits) << (I % 64);
+    }
+    OffsetInBits = NewOffsetInBits;
+
+    APInt Bits(NumBitsToExtract, BitsData);
+
+    // Padding bits for non-byte-sized scalar types must be zero.
+    if (NeedsPadding) {
+      if (!Bits.isIntN(NumBits))
+        return AnyValue::poison();
+      Bits = Bits.trunc(NumBits);
+    }
+
+    if (Ty->isIntegerTy())
+      return Bits;
+    if (Ty->isFloatingPointTy())
+      return APFloat(Ty->getFltSemantics(), Bits);
+    assert(Ty->isPointerTy() && "Expect a pointer type");
+    // TODO: recover provenance
+    return Pointer(Bits);
+  }
+
+  assert(OffsetInBits % 8 == 0 && "Missing padding bits.");
+  if (auto *VecTy = dyn_cast<VectorType>(Ty)) {
+    Type *ElemTy = VecTy->getElementType();
+    std::vector<AnyValue> ValVec;
+    uint32_t NumElements = getEVL(VecTy->getElementCount());
+    ValVec.reserve(NumElements);
+    for (uint32_t I = 0; I != NumElements; ++I)
+      ValVec.push_back(
+          fromBytes(Bytes, ElemTy, OffsetInBits, /*CheckPaddingBits=*/false));
+    if (DL.isBigEndian())
+      std::reverse(ValVec.begin(), ValVec.end());
+    return AnyValue(std::move(ValVec));
+  }
+  if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
+    Type *ElemTy = ArrTy->getElementType();
+    std::vector<AnyValue> ValVec;
+    uint32_t NumElements = ArrTy->getNumElements();
+    ValVec.reserve(NumElements);
+    for (uint32_t I = 0; I != NumElements; ++I)
+      ValVec.push_back(
+          fromBytes(Bytes, ElemTy, OffsetInBits, /*CheckPaddingBits=*/true));
+    return AnyValue(std::move(ValVec));
+  }
+  if (auto *StructTy = dyn_cast<StructType>(Ty)) {
+    auto *Layout = DL.getStructLayout(StructTy);
+    uint32_t BaseOffsetInBits = OffsetInBits;
+    std::vector<AnyValue> ValVec;
+    uint32_t NumElements = StructTy->getNumElements();
+    ValVec.reserve(NumElements);
+    for (uint32_t I = 0; I != NumElements; ++I) {
+      Type *ElemTy = StructTy->getElementType(I);
+      TypeSize ElemOffset = Layout->getElementOffset(I);
+      OffsetInBits =
+          BaseOffsetInBits + (ElemOffset.isScalable()
+                                  ? ElemOffset.getKnownMinValue() * VScale
+                                  : ElemOffset.getFixedValue()) *
+                                 8;
+      ValVec.push_back(
+          fromBytes(Bytes, ElemTy, OffsetInBits, /*CheckPaddingBits=*/true));
+    }
+    OffsetInBits =
+        BaseOffsetInBits +
+        static_cast<uint32_t>(getEffectiveTypeStoreSize(StructTy)) * 8;
+    return AnyValue(std::move(ValVec));
+  }
+  llvm_unreachable("Unsupported first class type.");
+}
+
+void Context::toBytes(const AnyValue &Val, Type *Ty, uint32_t &OffsetInBits,
+                      MutableArrayRef<Byte> Bytes, bool PaddingBits) {
+  if (Val.isPoison() || Ty->isIntegerTy() || Ty->isFloatingPointTy() ||
+      Ty->isPointerTy()) {
+    uint32_t NumBits = DL.getTypeSizeInBits(Ty).getFixedValue();
+    uint32_t NewOffsetInBits = OffsetInBits + NumBits;
+    if (PaddingBits)
+      NewOffsetInBits = alignTo(NewOffsetInBits, 8);
+    bool NeedsPadding = NewOffsetInBits != OffsetInBits + NumBits;
+    auto WriteBits = [&](const APInt &Bits) {
+      for (uint32_t I = 0, E = Bits.getBitWidth(); I < E; I += 8) {
+        uint32_t NumBitsInByte = std::min(8U, E - I);
+        uint32_t BitsStart =
+            OffsetInBits + (DL.isLittleEndian() ? I : (E - NumBitsInByte - I));
+        uint32_t BitsEnd = BitsStart + NumBitsInByte - 1;
+        uint8_t BitsVal =
+            static_cast<uint8_t>(Bits.extractBitsAsZExtValue(NumBitsInByte, I));
+
+        Bytes[BitsStart / 8].writeBits(
+            static_cast<uint8_t>(((1U << NumBitsInByte) - 1)
+                                 << (BitsStart % 8)),
+            static_cast<uint8_t>(BitsVal << (BitsStart % 8)));
+        // Crosses the byte boundary.
+        if (((BitsStart ^ BitsEnd) & ~7) != 0)
+          Bytes[BitsEnd / 8].writeBits(
+              static_cast<uint8_t>((1U << (BitsEnd % 8 + 1)) - 1),
+              static_cast<uint8_t>(BitsVal >> (8 - (BitsStart % 8))));
+      }
+    };
+    if (Val.isPoison()) {
+      for (uint32_t I = 0, E = NewOffsetInBits - OffsetInBits; I < E;) {
+        uint32_t NumBitsInByte = std::min(8 - (OffsetInBits + I) % 8, E - I);
+        assert(((OffsetInBits ^ (OffsetInBits + NumBitsInByte - 1)) & ~7) ==
+                   0 &&
+               "Across byte boundary.");
+        Bytes[(OffsetInBits + I) / 8].poisonBits(static_cast<uint8_t>(
+            ((1U << NumBitsInByte) - 1) << ((OffsetInBits + I) % 8)));
+        I += NumBitsInByte;
+      }
+    } else if (Ty->isIntegerTy()) {
+      auto &Bits = Val.asInteger();
+      WriteBits(NeedsPadding ? Bits.zext(NewOffsetInBits - OffsetInBits)
+                             : Bits);
+    } else if (Ty->isFloatingPointTy()) {
+      auto Bits = Val.asFloat().bitcastToAPInt();
+      WriteBits(NeedsPadding ? Bits.zext(NewOffsetInBits - OffsetInBits)
+                             : Bits);
+    } else if (Ty->isPointerTy()) {
+      auto &Bits = Val.asPointer().address();
+      WriteBits(NeedsPadding ? Bits.zext(NewOffsetInBits - OffsetInBits)
+                             : Bits);
+      // TODO: save metadata of the pointer.
+    } else {
+      llvm_unreachable("Unsupported scalar type.");
+    }
+    OffsetInBits = NewOffsetInBits;
+    return;
+  }
+
+  assert(OffsetInBits % 8 == 0 && "Missing padding bits.");
+  if (auto *VecTy = dyn_cast<VectorType>(Ty)) {
+    Type *ElemTy = VecTy->getElementType();
+    auto &ValVec = Val.asAggregate();
+    uint32_t NewOffsetInBits =
+        alignTo(OffsetInBits + DL.getTypeSizeInBits(ElemTy).getFixedValue() *
+                                   ValVec.size(),
+                8);
+    if (DL.isLittleEndian()) {
+      for (const auto &SubVal : ValVec)
+        toBytes(SubVal, ElemTy, OffsetInBits, Bytes,
+                /*PaddingBits=*/false);
+    } else {
+      for (const auto &SubVal : reverse(ValVec))
+        toBytes(SubVal, ElemTy, OffsetInBits, Bytes,
+                /*PaddingBits=*/false);
+    }
+    if (NewOffsetInBits != OffsetInBits) {
+      assert(OffsetInBits % 8 != 0 && NewOffsetInBits - OffsetInBits < 8 &&
+             "Unexpected offset.");
+      // Fill remaining bits with undef.
+      Bytes[OffsetInBits / 8].undefBits(
+          static_cast<uint8_t>(~0U << (OffsetInBits % 8)));
+    }
+    OffsetInBits = NewOffsetInBits;
+    return;
+  }
+  if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
+    Type *ElemTy = ArrTy->getElementType();
+    for (const auto &SubVal : Val.asAggregate())
+      toBytes(SubVal, ElemTy, OffsetInBits, Bytes, /*PaddingBits=*/true);
+    return;
+  }
+  if (auto *StructTy = dyn_cast<StructType>(Ty)) {
+    auto *Layout = DL.getStructLayout(StructTy);
+    uint32_t BaseOffsetInBits = OffsetInBits;
+    auto FillUndefBytes = [&](uint32_t NewOffsetInBits) {
+      if (OffsetInBits == NewOffsetInBits)
+        return;
+      // Fill padding bits due to alignment requirement.
+      assert(NewOffsetInBits > OffsetInBits &&
+             "Unexpected negative padding bits!");
+      fill(Bytes.slice(OffsetInBits / 8, (NewOffsetInBits - OffsetInBits) / 8),
+           Byte::undef());
+      OffsetInBits = NewOffsetInBits;
+    };
+    for (uint32_t I = 0, E = Val.asAggregate().size(); I != E; ++I) {
+      Type *ElemTy = StructTy->getElementType(I);
+      TypeSize ElemOffset = Layout->getElementOffset(I);
+      uint32_t NewOffsetInBits =
+          BaseOffsetInBits + (ElemOffset.isScalable()
+                                  ? ElemOffset.getKnownMinValue() * VScale
+                                  : ElemOffset.getFixedValue()) *
+                                 8;
+      FillUndefBytes(NewOffsetInBits);
+      toBytes(Val.asAggregate()[I], ElemTy, OffsetInBits, Bytes,
+              /*PaddingBits=*/true);
+    }
+    uint32_t NewOffsetInBits =
+        BaseOffsetInBits + getEffectiveTypeStoreSize(StructTy) * 8;
+    FillUndefBytes(NewOffsetInBits);
+    return;
+  }
+
+  llvm_unreachable("Unsupported first class type.");
+}
+
+AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty) {
+  uint32_t OffsetInBits = 0;
+  return fromBytes(Bytes, Ty, OffsetInBits, /*CheckPaddingBits=*/true);
+}
+
+void Context::toBytes(const AnyValue &Val, Type *Ty,
+                      MutableArrayRef<Byte> Bytes) {
+  uint32_t OffsetInBits = 0;
+  toBytes(Val, Ty, OffsetInBits, Bytes, /*PaddingBits=*/true);
+}
+
+AnyValue Context::load(MemoryObject &MO, uint64_t Offset, Type *ValTy) {
+  return fromBytes(
+      MO.getBytes().slice(Offset, getEffectiveTypeStoreSize(ValTy)), ValTy);
+}
+
+void Context::store(MemoryObject &MO, uint64_t Offset, const AnyValue &Val,
+                    Type *ValTy) {
+  toBytes(Val, ValTy,
+          MO.getBytes().slice(Offset, getEffectiveTypeStoreSize(ValTy)));
+}
+
+void Context::storeRawBytes(MemoryObject &MO, uint64_t Offset, const void *Data,
+                            uint64_t Size) {
+  for (uint64_t I = 0; I != Size; ++I)
+    MO[Offset + I] = Byte::concrete(static_cast<const uint8_t *>(Data)[I]);
+}
+
 MemoryObject::~MemoryObject() = default;
 MemoryObject::MemoryObject(uint64_t Addr, uint64_t Size, StringRef Name,
                            unsigned AS, MemInitKind InitKind)
@@ -111,13 +372,13 @@ MemoryObject::MemoryObject(uint64_t Addr, uint64_t Size, StringRef Name,
                                               : MemoryObjectState::Dead) {
   switch (InitKind) {
   case MemInitKind::Zeroed:
-    Bytes.resize(Size, Byte{0, ByteKind::Concrete});
+    Bytes.resize(Size, Byte::concrete(0));
     break;
   case MemInitKind::Uninitialized:
-    Bytes.resize(Size, Byte{0, ByteKind::Undef});
+    Bytes.resize(Size, Byte::undef());
     break;
   case MemInitKind::Poisoned:
-    Bytes.resize(Size, Byte{0, ByteKind::Poison});
+    Bytes.resize(Size, Byte::poison());
     break;
   }
 }
@@ -175,38 +436,22 @@ BasicBlock *Context::getTargetBlock(const Pointer &Ptr) {
   return It->second.first;
 }
 
-void MemoryObject::markAsFreed() {
-  State = MemoryObjectState::Freed;
-  Bytes.clear();
+uint64_t Context::getEffectiveTypeAllocSize(Type *Ty) {
+  TypeSize Size = DL.getTypeAllocSize(Ty);
+  if (Size.isScalable())
+    return Size.getKnownMinValue() * VScale;
+  return Size.getFixedValue();
 }
-
-void MemoryObject::writeRawBytes(uint64_t Offset, const void *Data,
-                                 uint64_t Length) {
-  assert(SaturatingAdd(Offset, Length) <= Size && "Write out of bounds");
-  const uint8_t *ByteData = static_cast<const uint8_t *>(Data);
-  for (uint64_t I = 0; I < Length; ++I)
-    Bytes[Offset + I].set(ByteData[I]);
+uint64_t Context::getEffectiveTypeStoreSize(Type *Ty) {
+  TypeSize Size = DL.getTypeStoreSize(Ty);
+  if (Size.isScalable())
+    return Size.getKnownMinValue() * VScale;
+  return Size.getFixedValue();
 }
 
-void MemoryObject::writeInteger(uint64_t Offset, const APInt &Int,
-                                const DataLayout &DL) {
-  uint64_t BitWidth = Int.getBitWidth();
-  uint64_t IntSize = divideCeil(BitWidth, 8);
-  assert(SaturatingAdd(Offset, IntSize) <= Size && "Write out of bounds");
-  for (uint64_t I = 0; I < IntSize; ++I) {
-    uint64_t ByteIndex = DL.isLittleEndian() ? I : (IntSize - 1 - I);
-    uint64_t Bits = std::min(BitWidth - ByteIndex * 8, uint64_t(8));
-    Bytes[Offset + I].set(Int.extractBitsAsZExtValue(Bits, ByteIndex * 8));
-  }
-}
-void MemoryObject::writeFloat(uint64_t Offset, const APFloat &Float,
-                              const DataLayout &DL) {
-  writeInteger(Offset, Float.bitcastToAPInt(), DL);
-}
-void MemoryObject::writePointer(uint64_t Offset, const Pointer &Ptr,
-                                const DataLayout &DL) {
-  writeInteger(Offset, Ptr.address(), DL);
-  // TODO: provenance
+void MemoryObject::markAsFreed() {
+  State = MemoryObjectState::Freed;
+  Bytes.clear();
 }
 
 } // namespace llvm::ubi
diff --git a/llvm/tools/llubi/lib/Context.h b/llvm/tools/llubi/lib/Context.h
index 25ba940323ebf..625b214391c8d 100644
--- a/llvm/tools/llubi/lib/Context.h
+++ b/llvm/tools/llubi/lib/Context.h
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Module.h"
 #include <map>
+#include <random>
 
 namespace llvm::ubi {
 
@@ -41,6 +42,11 @@ enum class MemoryObjectState {
   Freed,
 };
 
+enum class UndefValueBehavior {
+  NonDeterministic, // Each use of the undef value can yield different results.
+  Zero,             // All uses of the undef value yield zero.
+};
+
 class MemoryObject : public RefCountedBase<MemoryObject> {
   uint64_t Address;
   uint64_t Size;
@@ -65,6 +71,7 @@ class MemoryObject : public RefCountedBase<MemoryObject> {
   StringRef getName() const { return Name; }
   unsigned getAddressSpace() const { return AS; }
   MemoryObjectState getState() const { return State; }
+  void setState(MemoryObjectState S) { State = S; }
   bool isConstant() const { return IsConstant; }
   void setIsConstant(bool C) { IsConstant = C; }
 
@@ -76,10 +83,8 @@ class MemoryObject : public RefCountedBase<MemoryObject> {
     assert(Offset < Size && "Offset out of bounds");
     return Bytes[Offset];
   }
-  void writeRawBytes(uint64_t Offset, const void *Data, uint64_t Length);
-  void writeInteger(uint64_t Offset, const APInt &Int, const DataLayout &DL);
-  void writeFloat(uint64_t Offset, const APFloat &Float, const DataLayout &DL);
-  void writePointer(uint64_t Offset, const Pointer &Ptr, const DataLayout &DL);
+  ArrayRef<Byte> getBytes() const { return Bytes; }
+  MutableArrayRef<Byte> getBytes() { return Bytes; }
 
   void markAsFreed();
 };
@@ -126,6 +131,9 @@ class Context {
   uint32_t VScale = 4;
   uint32_t MaxSteps = 0;
   uint32_t MaxStackDepth = 256;
+  UndefValueBehavior UndefBehavior = UndefValueBehavior::NonDeterministic;
+
+  std::mt19937_64 Rng;
 
   // Memory
   uint64_t UsedMem = 0;
@@ -141,6 +149,10 @@ class Context {
   // precisely after we make ptrtoint have the implicit side-effect of exposing
   // the provenance.
   std::map<uint64_t, IntrusiveRefCntPtr<MemoryObject>> MemoryObjects;
+  AnyValue fromBytes(ArrayRef<Byte> Bytes, Type *Ty, uint32_t &OffsetInBits,
+                     bool CheckPaddingBits);
+  void toBytes(const AnyValue &Val, Type *Ty, uint32_t &OffsetInBits,
+               MutableArrayRef<Byte> Bytes, bool PaddingBits);
 
   // Constants
   // Use std::map to avoid iterator/reference invalidation.
@@ -171,6 +183,8 @@ class Context {
   uint32_t getVScale() const { return VScale; }
   uint32_t getMaxSteps() const { return MaxSteps; }
   uint32_t getMaxStackDepth() const { return MaxStackDepth; }
+  void setUndefValueBehavior(UndefValueBehavior UB) { UndefBehavior = UB; }
+  void reseed(uint32_t Seed) { Rng.seed(Seed); }
 
   LLVMContext &getContext() const { return Ctx; }
   const DataLayout &getDataLayout() const { return DL; }
@@ -180,6 +194,8 @@ class Context {
       return VScale * EC.getKnownMinValue();
     return EC.getFixedValue();
   }
+  uint64_t getEffectiveTypeAllocSize(Type *Ty);
+  uint64_t getEffectiveTypeStoreSize(Type *Ty);
 
   const AnyValue &getConstantValue(Constant *C);
   IntrusiveRefCntPtr<MemoryObject> allocate(uint64_t Size, uint64_t Align,
@@ -189,6 +205,18 @@ class Context {
   /// Derive a pointer from a memory object with offset 0.
   /// Please use Pointer's interface for further manipulations.
   Pointer deriveFromMemoryObject(IntrusiveRefCntPtr<MemoryObject> Obj);
+  /// Convert byte sequence to an value of the given type. Uninitialized bits
+  /// are flushed according to the options.
+  AnyValue fromBytes(ArrayRef<Byte> Bytes, Type *Ty);
+  /// Convert a value to byte sequence. Padding bits are set to zero.
+  void toBytes(const AnyValue &Val, Type *Ty, MutableArrayRef<Byte> Bytes);
+  /// Direct memory load without checks.
+  AnyValue load(MemoryObject &MO, uint64_t Offset, Type *ValTy);
+  /// Direct memory store without checks.
+  void store(MemoryObject &MO, uint64_t Offset, const AnyValue &Val,
+             Type *ValTy);
+  void storeRawBytes(MemoryObject &MO, uint64_t Offset, const void *Data,
+                     uint64_t Size);
 
   Function *getTargetFunction(const Pointer &Ptr);
   BasicBlock *getTargetBlock(const Pointer &Ptr);
diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp
index 7961f7551cf48..59b5667c6d63c 100644
--- a/llvm/tools/llubi/lib/Interpreter.cpp
+++ b/llvm/tools/llubi/lib/Interpreter.cpp
@@ -16,10 +16,13 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Allocator.h"
 
 namespace llvm::ubi {
 
+using namespace PatternMatch;
+
 enum class FrameState {
   // It is about to enter the function.
   // Valid transition:
@@ -248,6 +251,84 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     return false;
   }
 
+  /// Check if the upcoming memory access is valid. Returns the offset relative
+  /// to the underlying object if it is valid.
+  std::optional<uint64_t> verifyMemAccess(const MemoryObject &MO,
+                                          const APInt &Address,
+                                          uint64_t AccessSize,
+                                          uint64_t Alignment, bool IsStore) {
+    // Loading from a stack object outside its lifetime is not undefined
+    // behavior and returns a poison value instead. Storing to it is still
+    // undefined behavior.
+    if (IsStore ? MO.getState() != MemoryObjectState::Alive : MO.getState() == MemoryObjectState::Freed) {
+      reportImmediateUB("Try to access a dead memory object.");
+      return std::nullopt;
+    }
+
+    assert(isPowerOf2_64(Alignment) && "Alignment should be a power of 2.");
+    if (Address.countr_zero() < Log2_64(Alignment)) {
+      reportImmediateUB("Misaligned memory access.");
+      return std::nullopt;
+    }
+
+    if (AccessSize > MO.getSize() || Address.ult(MO.getAddress())) {
+      reportImmediateUB("Memory access is out of bounds.");
+      return std::nullopt;
+    }
+
+    APInt Offset = Address - MO.getAddress();
+
+    if (Offset.ugt(MO.getSize() - AccessSize)) {
+      reportImmediateUB("Memory access is out of bounds.");
+      return std::nullopt;
+    }
+
+    return Offset.getZExtValue();
+  }
+
+  AnyValue load(const AnyValue &Ptr, uint64_t Align, Type *ValTy) {
+    if (Ptr.isPoison()) {
+      reportImmediateUB("Invalid memory access with a poison pointer.");
+      return AnyValue::getPoisonValue(Ctx, ValTy);
+    }
+    auto &PtrVal = Ptr.asPointer();
+    auto *MO = PtrVal.getMemoryObject();
+    if (!MO) {
+      reportImmediateUB("Invalid memory access via a pointer with nullary provenance.");
+      return AnyValue::getPoisonValue(Ctx, ValTy);
+    }
+    // TODO: pointer capability check
+    if (auto Offset = verifyMemAccess(
+            *MO, PtrVal.address(), Ctx.getEffectiveTypeStoreSize(ValTy), Align,
+            /*IsStore=*/false)) {
+      // Load from a dead stack object yields poison value.
+      if (MO->getState() == MemoryObjectState::Dead)
+        return AnyValue::getPoisonValue(Ctx, ValTy);
+
+      return Ctx.load(*MO, *Offset, ValTy);
+    }
+    return AnyValue::getPoisonValue(Ctx, ValTy);
+  }
+
+  void store(const AnyValue &Ptr, uint64_t Align, const AnyValue &Val,
+             Type *ValTy) {
+    if (Ptr.isPoison()) {
+      reportImmediateUB("Invalid memory access with a poison pointer.");
+      return;
+    }
+    auto &PtrVal = Ptr.asPointer();
+    auto *MO = PtrVal.getMemoryObject();
+    if (!MO) {
+      reportImmediateUB("Invalid memory access via a pointer with nullary provenance.");
+      return;
+    }
+    // TODO: pointer capability check
+    if (auto Offset = verifyMemAccess(
+            *MO, PtrVal.address(), Ctx.getEffectiveTypeStoreSize(ValTy), Align,
+            /*IsStore=*/true))
+      Ctx.store(*MO, *Offset, Val, ValTy);
+  }
+
   AnyValue computePtrAdd(const Pointer &Ptr, const APInt &Offset,
                          GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset) {
     if (Offset.isZero())
@@ -438,6 +519,23 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
       }
       // TODO: handle llvm.assume with operand bundles
       return AnyValue();
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end: {
+      auto *Ptr = CB.getArgOperand(0);
+      if (isa<PoisonValue>(Ptr))
+        return AnyValue();
+      auto *MO = getValue(Ptr).asPointer().getMemoryObject();
+      assert(MO && "Memory object accessed by lifetime intrinsic should be "
+                   "always valid.");
+      if (IID == Intrinsic::lifetime_start) {
+        MO->setState(MemoryObjectState::Alive);
+        fill(MO->getBytes(), Byte::undef());
+      } else {
+        MO->setState(MemoryObjectState::Dead);
+        fill(MO->getBytes(), Byte::poison());
+      }
+      return AnyValue();
+    }
     default:
       Handler.onUnrecognizedInstruction(CB);
       Status = false;
@@ -799,8 +897,7 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
   }
 
   void visitAllocaInst(AllocaInst &AI) {
-    uint64_t AllocSize =
-        DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
+    uint64_t AllocSize = Ctx.getEffectiveTypeAllocSize(AI.getAllocatedType());
     if (AI.isArrayAllocation()) {
       auto &Size = getValue(AI.getArraySize());
       if (Size.isPoison()) {
@@ -821,10 +918,14 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
         return;
       }
     }
-    // FIXME: If it is used by llvm.lifetime.start, it should be initially dead.
+    // If it is used by llvm.lifetime.start, it should be initially dead.
+    bool IsInitiallyDead = any_of(AI.users(), [](User *U) {
+      return match(U, m_Intrinsic<Intrinsic::lifetime_start>());
+    });
     auto Obj = Ctx.allocate(AllocSize, AI.getPointerAlignment(DL).value(),
                             AI.getName(), AI.getAddressSpace(),
-                            MemInitKind::Uninitialized);
+                            IsInitiallyDead ? MemInitKind::Poisoned
+                                            : MemInitKind::Uninitialized);
     if (!Obj) {
       reportError("Insufficient stack space.");
       return;
@@ -920,6 +1021,24 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     });
   }
 
+  void visitLoadInst(LoadInst &LI) {
+    auto RetVal = load(getValue(LI.getPointerOperand()), LI.getAlign().value(),
+                       LI.getType());
+    // TODO: track volatile loads
+    // TODO: handle metadata
+    setResult(LI, std::move(RetVal));
+  }
+
+  void visitStoreInst(StoreInst &SI) {
+    auto &Ptr = getValue(SI.getPointerOperand());
+    auto &Val = getValue(SI.getValueOperand());
+    // TODO: track volatile stores
+    // TODO: handle metadata
+    store(Ptr, SI.getAlign().value(), Val, SI.getValueOperand()->getType());
+    if (Status)
+      Status &= Handler.onInstructionExecuted(SI, AnyValue());
+  }
+
   void visitInstruction(Instruction &I) {
     Handler.onUnrecognizedInstruction(I);
     Status = false;
diff --git a/llvm/tools/llubi/lib/Value.h b/llvm/tools/llubi/lib/Value.h
index 6add6e7f06304..b6ce06af19f54 100644
--- a/llvm/tools/llubi/lib/Value.h
+++ b/llvm/tools/llubi/lib/Value.h
@@ -21,26 +21,55 @@ class MemoryObject;
 class Context;
 class AnyValue;
 
-enum class ByteKind : uint8_t {
-  // A concrete byte with a known value.
-  Concrete,
-  // A uninitialized byte. Each load from an uninitialized byte yields
-  // a nondeterministic value.
-  Undef,
-  // A poisoned byte. It occurs when the program stores a poison value to
-  // memory,
-  // or when a memory object is dead.
-  Poison,
-};
-
+/// Representation of a byte in memory.
+/// How to interpret the byte per bit:
+/// - If the concrete mask bit is 0, the bit is either undef or poison. The
+/// value bit indicates whether it is undef.
+/// - If the concrete mask bit is 1, the bit is a concrete value. The value bit
+/// stores the concrete bit value.
 struct Byte {
+  uint8_t ConcreteMask;
   uint8_t Value;
-  ByteKind Kind : 2;
-  // TODO: provenance
+  // TODO: captured capabilities of pointers.
+
+  static Byte poison() { return Byte{0, 0}; }
+  static Byte undef() { return Byte{0, 255}; }
+  static Byte concrete(uint8_t Val) { return Byte{255, Val}; }
+
+  void zeroBits(uint8_t Mask) {
+    ConcreteMask |= Mask;
+    Value &= ~Mask;
+  }
+
+  void poisonBits(uint8_t Mask) {
+    ConcreteMask &= ~Mask;
+    Value &= ~Mask;
+  }
+
+  void undefBits(uint8_t Mask) {
+    ConcreteMask &= ~Mask;
+    Value |= Mask;
+  }
+
+  void writeBits(uint8_t Mask, uint8_t Val) {
+    ConcreteMask |= Mask;
+    Value = (Value & ~Mask) | (Val & Mask);
+  }
+
+  /// Returns a logical byte that is part of two adjacent bytes.
+  /// Example with ShAmt = 5:
+  ///     |      Byte0      |     Byte1       |
+  /// LSB | 0 1 0 1 0 1 0 1 | 0 0 0 0 1 1 1 1 | MSB
+  ///     Result =  | 1 0 1   0 0 0 0 1 |
+  Byte fshr(const Byte &High, uint32_t ShAmt) const {
+    return Byte{static_cast<uint8_t>(
+                    (ConcreteMask | (High.ConcreteMask << 8)) >> ShAmt),
+                static_cast<uint8_t>((Value | (High.Value << 8)) >> ShAmt)};
+  }
 
-  void set(uint8_t V) {
-    Value = V;
-    Kind = ByteKind::Concrete;
+  Byte lshr(uint8_t Shift) const {
+    return Byte{static_cast<uint8_t>(ConcreteMask >> Shift),
+                static_cast<uint8_t>(Value >> Shift)};
   }
 };
 
diff --git a/llvm/tools/llubi/llubi.cpp b/llvm/tools/llubi/llubi.cpp
index 1d2d4dc050b5d..de76a7e64c27b 100644
--- a/llvm/tools/llubi/llubi.cpp
+++ b/llvm/tools/llubi/llubi.cpp
@@ -74,6 +74,19 @@ static cl::opt<unsigned>
     VScale("vscale", cl::desc("The value of llvm.vscale (default = 4)"),
            cl::value_desc("N"), cl::init(4), cl::cat(InterpreterCategory));
 
+static cl::opt<unsigned>
+    Seed("seed",
+         cl::desc("Random seed for non-deterministic behavior (default = 0)"),
+         cl::value_desc("N"), cl::init(0), cl::cat(InterpreterCategory));
+
+cl::opt<ubi::UndefValueBehavior> UndefBehavior(
+    "", cl::desc("Choose undef value behavior:"),
+    cl::values(clEnumVal(ubi::UndefValueBehavior::NonDeterministic,
+                         "Each load of an uninitialized byte yields a freshly "
+                         "random value."),
+               clEnumVal(ubi::UndefValueBehavior::Zero,
+                         "All uses of an uninitialized byte yield zero.")));
+
 class VerboseEventHandler : public ubi::EventHandler {
 public:
   bool onInstructionExecuted(Instruction &I,
@@ -164,6 +177,8 @@ int main(int argc, char **argv) {
   Ctx.setVScale(VScale);
   Ctx.setMaxSteps(MaxSteps);
   Ctx.setMaxStackDepth(MaxStackDepth);
+  Ctx.setUndefValueBehavior(UndefBehavior);
+  Ctx.reseed(Seed);
 
   if (!Ctx.initGlobalValues()) {
     WithColor::error() << "Failed to initialize global values (e.g., the "
@@ -182,8 +197,8 @@ int main(int argc, char **argv) {
   }
   TargetLibraryInfo TLI(Ctx.getTLIImpl());
   Type *IntTy = IntegerType::get(Ctx.getContext(), TLI.getIntSize());
-  auto *MainFuncTy = FunctionType::get(
-      IntTy, {IntTy, PointerType::getUnqual(Ctx.getContext())}, false);
+  Type *PtrTy = PointerType::getUnqual(Ctx.getContext());
+  auto *MainFuncTy = FunctionType::get(IntTy, {IntTy, PtrTy}, false);
   SmallVector<ubi::AnyValue> Args;
   if (EntryFn->getFunctionType() == MainFuncTy) {
     Args.push_back(
@@ -206,8 +221,8 @@ int main(int argc, char **argv) {
         return 1;
       }
       ubi::Pointer ArgPtr = Ctx.deriveFromMemoryObject(ArgvStrMem);
-      ArgvStrMem->writeRawBytes(0, Arg.c_str(), Arg.length());
-      ArgvPtrsMem->writePointer(Idx * PtrSize, ArgPtr, Ctx.getDataLayout());
+      Ctx.storeRawBytes(*ArgvStrMem, 0, Arg.c_str(), Arg.length());
+      Ctx.store(*ArgvPtrsMem, Idx * PtrSize, ArgPtr, PtrTy);
     }
     Args.push_back(Ctx.deriveFromMemoryObject(ArgvPtrsMem));
   } else if (!EntryFn->arg_empty()) {

>From c1d67b96883394d3b0b10928d1db372b7a401ce8 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 21 Feb 2026 00:30:32 +0800
Subject: [PATCH 2/6] [llubi] Update offset after loading poison bits

---
 llvm/test/tools/llubi/loadstore_be.ll | 9 +++++++++
 llvm/test/tools/llubi/loadstore_le.ll | 9 +++++++++
 llvm/tools/llubi/lib/Context.cpp      | 6 ++++--
 llvm/tools/llubi/lib/Interpreter.cpp  | 9 ++++++---
 llvm/tools/llubi/lib/Value.h          | 8 ++++----
 5 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/llvm/test/tools/llubi/loadstore_be.ll b/llvm/test/tools/llubi/loadstore_be.ll
index 0b78536cd808f..4e194a9703356 100644
--- a/llvm/test/tools/llubi/loadstore_be.ll
+++ b/llvm/test/tools/llubi/loadstore_be.ll
@@ -86,6 +86,11 @@ define void @main() {
   store [2 x i32] [i32 1, i32 2], ptr %alloc_array
   %val26 = load [2 x i32], ptr %alloc_array
 
+  %alloc_i1_vec = alloca <4 x i1>
+  store <4 x i1> <i1 1, i1 0, i1 poison, i1 0>, ptr %alloc_i1_vec
+  %val27 = load <4 x i1>, ptr %alloc_i1_vec
+  %val28 = load i8, ptr %alloc_i1_vec
+
   ret void
 }
 ; CHECK: Entering function: main
@@ -149,5 +154,9 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_array = alloca [2 x i32], align 4 => ptr 0x70 [alloc_array]
 ; CHECK-NEXT:   store [2 x i32] [i32 1, i32 2], ptr %alloc_array, align 4
 ; CHECK-NEXT:   %val26 = load [2 x i32], ptr %alloc_array, align 4 => { i32 1, i32 2 }
+; CHECK-NEXT:   %alloc_i1_vec = alloca <4 x i1>, align 1 => ptr 0x78 [alloc_i1_vec]
+; CHECK-NEXT:   store <4 x i1> <i1 true, i1 false, i1 poison, i1 false>, ptr %alloc_i1_vec, align 1
+; CHECK-NEXT:   %val27 = load <4 x i1>, ptr %alloc_i1_vec, align 1 => { T, F, poison, F }
+; CHECK-NEXT:   %val28 = load i8, ptr %alloc_i1_vec, align 1 => poison
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: Exiting function: main
diff --git a/llvm/test/tools/llubi/loadstore_le.ll b/llvm/test/tools/llubi/loadstore_le.ll
index 62caea6aceacc..d54a54fc8d3f2 100644
--- a/llvm/test/tools/llubi/loadstore_le.ll
+++ b/llvm/test/tools/llubi/loadstore_le.ll
@@ -87,6 +87,11 @@ define void @main() {
   store [2 x i32] [i32 1, i32 2], ptr %alloc_array
   %val26 = load [2 x i32], ptr %alloc_array
 
+  %alloc_i1_vec = alloca <4 x i1>
+  store <4 x i1> <i1 1, i1 0, i1 poison, i1 0>, ptr %alloc_i1_vec
+  %val27 = load <4 x i1>, ptr %alloc_i1_vec
+  %val28 = load i8, ptr %alloc_i1_vec
+
   ret void
 }
 ; CHECK: Entering function: main
@@ -151,5 +156,9 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_array = alloca [2 x i32], align 4 => ptr 0x70 [alloc_array]
 ; CHECK-NEXT:   store [2 x i32] [i32 1, i32 2], ptr %alloc_array, align 4
 ; CHECK-NEXT:   %val26 = load [2 x i32], ptr %alloc_array, align 4 => { i32 1, i32 2 }
+; CHECK-NEXT:   %alloc_i1_vec = alloca <4 x i1>, align 1 => ptr 0x78 [alloc_i1_vec]
+; CHECK-NEXT:   store <4 x i1> <i1 true, i1 false, i1 poison, i1 false>, ptr %alloc_i1_vec, align 1
+; CHECK-NEXT:   %val27 = load <4 x i1>, ptr %alloc_i1_vec, align 1 => { T, F, poison, F }
+; CHECK-NEXT:   %val28 = load i8, ptr %alloc_i1_vec, align 1 => poison
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: Exiting function: main
diff --git a/llvm/tools/llubi/lib/Context.cpp b/llvm/tools/llubi/lib/Context.cpp
index 1e7b0dd723dc2..631826fd76143 100644
--- a/llvm/tools/llubi/lib/Context.cpp
+++ b/llvm/tools/llubi/lib/Context.cpp
@@ -124,12 +124,14 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
         LogicalByte = Bytes[BitsStart / 8].lshr(BitsStart % 8);
       else
         LogicalByte =
-            Bytes[BitsStart / 8].fshr(Bytes[BitsEnd / 8], BitsStart % 8);
+            Byte::fshr(Bytes[BitsStart / 8], Bytes[BitsEnd / 8], BitsStart % 8);
 
       uint32_t Mask = (1U << NumBitsInByte) - 1;
       // If any of the bits in the byte is poison, the whole value is poison.
-      if (~LogicalByte.ConcreteMask & ~LogicalByte.Value & Mask)
+      if (~LogicalByte.ConcreteMask & ~LogicalByte.Value & Mask) {
+        OffsetInBits = NewOffsetInBits;
         return AnyValue::poison();
+      }
       uint8_t RandomBits = 0;
       if (UndefBehavior == UndefValueBehavior::NonDeterministic &&
           (~LogicalByte.ConcreteMask & Mask)) {
diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp
index 59b5667c6d63c..19493c06163a8 100644
--- a/llvm/tools/llubi/lib/Interpreter.cpp
+++ b/llvm/tools/llubi/lib/Interpreter.cpp
@@ -260,7 +260,8 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     // Loading from a stack object outside its lifetime is not undefined
     // behavior and returns a poison value instead. Storing to it is still
     // undefined behavior.
-    if (IsStore ? MO.getState() != MemoryObjectState::Alive : MO.getState() == MemoryObjectState::Freed) {
+    if (IsStore ? MO.getState() != MemoryObjectState::Alive
+                : MO.getState() == MemoryObjectState::Freed) {
       reportImmediateUB("Try to access a dead memory object.");
       return std::nullopt;
     }
@@ -294,7 +295,8 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     auto &PtrVal = Ptr.asPointer();
     auto *MO = PtrVal.getMemoryObject();
     if (!MO) {
-      reportImmediateUB("Invalid memory access via a pointer with nullary provenance.");
+      reportImmediateUB(
+          "Invalid memory access via a pointer with nullary provenance.");
       return AnyValue::getPoisonValue(Ctx, ValTy);
     }
     // TODO: pointer capability check
@@ -319,7 +321,8 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     auto &PtrVal = Ptr.asPointer();
     auto *MO = PtrVal.getMemoryObject();
     if (!MO) {
-      reportImmediateUB("Invalid memory access via a pointer with nullary provenance.");
+      reportImmediateUB(
+          "Invalid memory access via a pointer with nullary provenance.");
       return;
     }
     // TODO: pointer capability check
diff --git a/llvm/tools/llubi/lib/Value.h b/llvm/tools/llubi/lib/Value.h
index b6ce06af19f54..b4686160ea8b8 100644
--- a/llvm/tools/llubi/lib/Value.h
+++ b/llvm/tools/llubi/lib/Value.h
@@ -58,13 +58,13 @@ struct Byte {
 
   /// Returns a logical byte that is part of two adjacent bytes.
   /// Example with ShAmt = 5:
-  ///     |      Byte0      |     Byte1       |
+  ///     |       Low       |      High       |
   /// LSB | 0 1 0 1 0 1 0 1 | 0 0 0 0 1 1 1 1 | MSB
   ///     Result =  | 1 0 1   0 0 0 0 1 |
-  Byte fshr(const Byte &High, uint32_t ShAmt) const {
+  static Byte fshr(const Byte &Low, const Byte &High, uint32_t ShAmt) {
     return Byte{static_cast<uint8_t>(
-                    (ConcreteMask | (High.ConcreteMask << 8)) >> ShAmt),
-                static_cast<uint8_t>((Value | (High.Value << 8)) >> ShAmt)};
+                    (Low.ConcreteMask | (High.ConcreteMask << 8)) >> ShAmt),
+                static_cast<uint8_t>((Low.Value | (High.Value << 8)) >> ShAmt)};
   }
 
   Byte lshr(uint8_t Shift) const {

>From 3f276f08e5b23373fd22532d8f84b0a7f9e4bf24 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 21 Feb 2026 01:39:31 +0800
Subject: [PATCH 3/6] [llubi] Fix build error

---
 llvm/tools/llubi/lib/Context.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llubi/lib/Context.cpp b/llvm/tools/llubi/lib/Context.cpp
index 631826fd76143..46778949454e6 100644
--- a/llvm/tools/llubi/lib/Context.cpp
+++ b/llvm/tools/llubi/lib/Context.cpp
@@ -136,8 +136,8 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
       if (UndefBehavior == UndefValueBehavior::NonDeterministic &&
           (~LogicalByte.ConcreteMask & Mask)) {
         // This byte contains undef bits.
-        std::uniform_int_distribution<uint8_t> Distrib;
-        RandomBits = Distrib(Rng);
+        std::uniform_int_distribution<uint32_t> Distrib(0, 255);
+        RandomBits = static_cast<uint8_t>(Distrib(Rng));
       }
       uint8_t ActualBits = ((LogicalByte.Value & LogicalByte.ConcreteMask) |
                             (RandomBits & ~LogicalByte.ConcreteMask)) &

>From c495ac29200d3c2447bea4b48065797507e6f5c2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 5 Mar 2026 05:33:12 +0800
Subject: [PATCH 4/6] [llubi] Address review comments.

---
 llvm/test/tools/llubi/loadstore_be.ll | 32 +++++++++++---
 llvm/test/tools/llubi/loadstore_le.ll | 30 ++++++++++---
 llvm/tools/llubi/lib/Context.cpp      | 63 +++++++++++++++++----------
 llvm/tools/llubi/lib/Context.h        | 14 +++++-
 llvm/tools/llubi/lib/Interpreter.cpp  | 36 ++++++++-------
 5 files changed, 117 insertions(+), 58 deletions(-)

diff --git a/llvm/test/tools/llubi/loadstore_be.ll b/llvm/test/tools/llubi/loadstore_be.ll
index 4e194a9703356..6e6d738b002ca 100644
--- a/llvm/test/tools/llubi/loadstore_be.ll
+++ b/llvm/test/tools/llubi/loadstore_be.ll
@@ -91,6 +91,16 @@ define void @main() {
   %val27 = load <4 x i1>, ptr %alloc_i1_vec
   %val28 = load i8, ptr %alloc_i1_vec
 
+  %alloc_padding = alloca i31
+  store i32 0, ptr %alloc_padding
+
+  %alloc_padding_vec = alloca i64
+  store { <6 x i5>, i32 } { <6 x i5> zeroinitializer, i32 -1}, ptr %alloc_padding_vec
+  %load_agg = load { <6 x i5>, i32 }, ptr %alloc_padding_vec
+  %load_vec = load <6 x i5>, ptr %alloc_padding_vec
+  %load_int_non_zero_padding = load i33, ptr %alloc_padding_vec
+  %load_vec_non_zero_padding = load <3 x i11>, ptr %alloc_padding_vec
+
   ret void
 }
 ; CHECK: Entering function: main
@@ -107,8 +117,8 @@ define void @main() {
 ; CHECK-NEXT:   %val6 = load <4 x i8>, ptr %alloc, align 4 => { i8 7, i8 6, i8 9, i8 8 }
 ; CHECK-NEXT:   %val7 = load <8 x i4>, ptr %alloc, align 4 => { i4 0, i4 7, i4 0, i4 6, i4 0, i4 -7, i4 0, i4 -8 }
 ; CHECK-NEXT:   store <3 x i3> <i3 1, i3 2, i3 3>, ptr %alloc, align 2
-; CHECK-NEXT:   %val8 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, F, T, F, F, F, T, F, T, F, F, T, T }
-; CHECK-NEXT:   %val9 = load <16 x i1>, ptr %alloc, align 2 => { F, F, T, F, F, T, F, F, F, T, F, T, F, F, T, T }
+; CHECK-NEXT:   %val8 = load <16 x i1>, ptr %alloc, align 2 => { F, F, F, F, F, F, F, F, F, T, F, T, F, F, T, T }
+; CHECK-NEXT:   %val9 = load <16 x i1>, ptr %alloc, align 2 => { F, F, F, F, F, F, F, F, F, T, F, T, F, F, T, T }
 ; CHECK-NEXT:   store <8 x i3> <i3 0, i3 1, i3 2, i3 3, i3 -4, i3 -3, i3 -2, i3 -1>, ptr %alloc, align 4
 ; CHECK-NEXT:   %val_bitcast = load <3 x i8>, ptr %alloc, align 4 => { i8 5, i8 57, i8 119 }
 ; CHECK-NEXT:   store i25 -1, ptr %alloc, align 4
@@ -120,8 +130,8 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_lifetime = alloca i32, align 4 => ptr 0xC [alloc_lifetime]
 ; CHECK-NEXT:   %val12 = load i32, ptr %alloc_lifetime, align 4 => poison
 ; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
-; CHECK-NEXT:   %val13 = load i32, ptr %alloc_lifetime, align 4 => i32 -1295355583
-; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 -1809495666
+; CHECK-NEXT:   %val13 = load i32, ptr %alloc_lifetime, align 4 => i32 -1744110296
+; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 1822494346
 ; CHECK-NEXT:   store i32 77, ptr %alloc_lifetime, align 4
 ; CHECK-NEXT:   %val15 = load i32, ptr %alloc_lifetime, align 4 => i32 77
 ; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
@@ -139,8 +149,8 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_struct = alloca %struct, align 8 => ptr 0x30 [alloc_struct]
 ; CHECK-NEXT:   store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct, align 8
 ; CHECK-NEXT:   %val19 = load %struct, ptr %alloc_struct, align 8 => { { i16 1, i16 2 }, i64 3 }
-; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 281483653031312
-; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 281487549378445
+; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 281486375577815
+; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 281485466753262
 ; CHECK-NEXT:   %alloc_struct_packed = alloca %struct.packed, align 8 => ptr 0x40 [alloc_struct_packed]
 ; CHECK-NEXT:   store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed, align 1
 ; CHECK-NEXT:   %val22 = load %struct.packed, ptr %alloc_struct_packed, align 1 => { { i16 1, i16 2 }, i64 3 }
@@ -156,7 +166,15 @@ define void @main() {
 ; CHECK-NEXT:   %val26 = load [2 x i32], ptr %alloc_array, align 4 => { i32 1, i32 2 }
 ; CHECK-NEXT:   %alloc_i1_vec = alloca <4 x i1>, align 1 => ptr 0x78 [alloc_i1_vec]
 ; CHECK-NEXT:   store <4 x i1> <i1 true, i1 false, i1 poison, i1 false>, ptr %alloc_i1_vec, align 1
-; CHECK-NEXT:   %val27 = load <4 x i1>, ptr %alloc_i1_vec, align 1 => { T, F, poison, F }
+; CHECK-NEXT:   %val27 = load <4 x i1>, ptr %alloc_i1_vec, align 1 => { F, F, F, F }
 ; CHECK-NEXT:   %val28 = load i8, ptr %alloc_i1_vec, align 1 => poison
+; CHECK-NEXT:   %alloc_padding = alloca i31, align 4 => ptr 0x7C [alloc_padding]
+; CHECK-NEXT:   store i32 0, ptr %alloc_padding, align 4
+; CHECK-NEXT:   %alloc_padding_vec = alloca i64, align 8 => ptr 0x80 [alloc_padding_vec]
+; CHECK-NEXT:   store { <6 x i5>, i32 } { <6 x i5> zeroinitializer, i32 -1 }, ptr %alloc_padding_vec, align 4
+; CHECK-NEXT:   %load_agg = load { <6 x i5>, i32 }, ptr %alloc_padding_vec, align 4 => { { i5 0, i5 0, i5 0, i5 0, i5 0, i5 0 }, i32 -1 }
+; CHECK-NEXT:   %load_vec = load <6 x i5>, ptr %alloc_padding_vec, align 4 => { i5 0, i5 0, i5 0, i5 0, i5 0, i5 0 }
+; CHECK-NEXT:   %load_int_non_zero_padding = load i33, ptr %alloc_padding_vec, align 8 => i33 255
+; CHECK-NEXT:   %load_vec_non_zero_padding = load <3 x i11>, ptr %alloc_padding_vec, align 8 => { i11 255, i11 0, i11 0 }
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: Exiting function: main
diff --git a/llvm/test/tools/llubi/loadstore_le.ll b/llvm/test/tools/llubi/loadstore_le.ll
index d54a54fc8d3f2..c876337d3f2a7 100644
--- a/llvm/test/tools/llubi/loadstore_le.ll
+++ b/llvm/test/tools/llubi/loadstore_le.ll
@@ -92,6 +92,16 @@ define void @main() {
   %val27 = load <4 x i1>, ptr %alloc_i1_vec
   %val28 = load i8, ptr %alloc_i1_vec
 
+  %alloc_padding = alloca i31
+  store i32 0, ptr %alloc_padding
+
+  %alloc_padding_vec = alloca i64
+  store { <6 x i5>, i32 } { <6 x i5> zeroinitializer, i32 -1}, ptr %alloc_padding_vec
+  %load_agg = load { <6 x i5>, i32 }, ptr %alloc_padding_vec
+  %load_vec = load <6 x i5>, ptr %alloc_padding_vec
+  %load_int_non_zero_padding = load i33, ptr %alloc_padding_vec
+  %load_vec_non_zero_padding = load <3 x i11>, ptr %alloc_padding_vec
+
   ret void
 }
 ; CHECK: Entering function: main
@@ -108,8 +118,8 @@ define void @main() {
 ; CHECK-NEXT:   %val6 = load <4 x i8>, ptr %alloc, align 4 => { i8 7, i8 6, i8 9, i8 8 }
 ; CHECK-NEXT:   %val7 = load <8 x i4>, ptr %alloc, align 4 => { i4 7, i4 0, i4 6, i4 0, i4 -7, i4 0, i4 -8, i4 0 }
 ; CHECK-NEXT:   store <3 x i3> <i3 1, i3 2, i3 3>, ptr %alloc, align 2
-; CHECK-NEXT:   %val8 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, T, F, T, T, F, F, T, F, F, F, F, T }
-; CHECK-NEXT:   %val9 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, T, F, T, T, F, F, T, F, F, T, F, F }
+; CHECK-NEXT:   %val8 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, T, F, T, T, F, F, F, F, F, F, F, F }
+; CHECK-NEXT:   %val9 = load <16 x i1>, ptr %alloc, align 2 => { T, F, F, F, T, F, T, T, F, F, F, F, F, F, F, F }
 ; CHECK-NEXT:   store <8 x i3> <i3 0, i3 1, i3 2, i3 3, i3 -4, i3 -3, i3 -2, i3 -1>, ptr %alloc, align 4
 ; CHECK-NEXT:   %val_bitcast = load <3 x i8>, ptr %alloc, align 4 => { i8 -120, i8 -58, i8 -6 }
 ; CHECK-NEXT:   store i25 -1, ptr %alloc, align 4
@@ -122,8 +132,8 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_lifetime = alloca i32, align 4 => ptr 0xC [alloc_lifetime]
 ; CHECK-NEXT:   %val12 = load i32, ptr %alloc_lifetime, align 4 => poison
 ; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
-; CHECK-NEXT:   %val13 = load i32, ptr %alloc_lifetime, align 4 => i32 -1295355583
-; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 -1809495666
+; CHECK-NEXT:   %val13 = load i32, ptr %alloc_lifetime, align 4 => i32 -1744110296
+; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 1822494346
 ; CHECK-NEXT:   store i32 77, ptr %alloc_lifetime, align 4
 ; CHECK-NEXT:   %val15 = load i32, ptr %alloc_lifetime, align 4 => i32 77
 ; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
@@ -141,8 +151,8 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_struct = alloca %struct, align 8 => ptr 0x30 [alloc_struct]
 ; CHECK-NEXT:   store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct, align 8
 ; CHECK-NEXT:   %val19 = load %struct, ptr %alloc_struct, align 8 => { { i16 1, i16 2 }, i64 3 }
-; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 371025319710294017
-; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 -1341035243900895231
+; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 -6382470561775091711
+; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 8160901778997641217
 ; CHECK-NEXT:   %alloc_struct_packed = alloca %struct.packed, align 8 => ptr 0x40 [alloc_struct_packed]
 ; CHECK-NEXT:   store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed, align 1
 ; CHECK-NEXT:   %val22 = load %struct.packed, ptr %alloc_struct_packed, align 1 => { { i16 1, i16 2 }, i64 3 }
@@ -160,5 +170,13 @@ define void @main() {
 ; CHECK-NEXT:   store <4 x i1> <i1 true, i1 false, i1 poison, i1 false>, ptr %alloc_i1_vec, align 1
 ; CHECK-NEXT:   %val27 = load <4 x i1>, ptr %alloc_i1_vec, align 1 => { T, F, poison, F }
 ; CHECK-NEXT:   %val28 = load i8, ptr %alloc_i1_vec, align 1 => poison
+; CHECK-NEXT:   %alloc_padding = alloca i31, align 4 => ptr 0x7C [alloc_padding]
+; CHECK-NEXT:   store i32 0, ptr %alloc_padding, align 4
+; CHECK-NEXT:   %alloc_padding_vec = alloca i64, align 8 => ptr 0x80 [alloc_padding_vec]
+; CHECK-NEXT:   store { <6 x i5>, i32 } { <6 x i5> zeroinitializer, i32 -1 }, ptr %alloc_padding_vec, align 4
+; CHECK-NEXT:   %load_agg = load { <6 x i5>, i32 }, ptr %alloc_padding_vec, align 4 => { { i5 0, i5 0, i5 0, i5 0, i5 0, i5 0 }, i32 -1 }
+; CHECK-NEXT:   %load_vec = load <6 x i5>, ptr %alloc_padding_vec, align 4 => { i5 0, i5 0, i5 0, i5 0, i5 0, i5 0 }
+; CHECK-NEXT:   %load_int_non_zero_padding = load i33, ptr %alloc_padding_vec, align 8 => poison
+; CHECK-NEXT:   %load_vec_non_zero_padding = load <3 x i11>, ptr %alloc_padding_vec, align 8 => { poison, poison, poison }
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: Exiting function: main
diff --git a/llvm/tools/llubi/lib/Context.cpp b/llvm/tools/llubi/lib/Context.cpp
index 46778949454e6..cfe96625b2c09 100644
--- a/llvm/tools/llubi/lib/Context.cpp
+++ b/llvm/tools/llubi/lib/Context.cpp
@@ -112,14 +112,17 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
       NewOffsetInBits = alignTo(NewOffsetInBits, 8);
     bool NeedsPadding = NewOffsetInBits != OffsetInBits + NumBits;
     uint32_t NumBitsToExtract = NewOffsetInBits - OffsetInBits;
-    SmallVector<uint64_t> BitsData(alignTo(NumBitsToExtract, 8));
+    SmallVector<uint64_t> RawBits(alignTo(NumBitsToExtract, 8));
     for (uint32_t I = 0; I < NumBitsToExtract; I += 8) {
+      // Try to form a 'logical' byte that represents the bits in the range
+      // [BitsStart, BitsEnd].
       uint32_t NumBitsInByte = std::min(8U, NumBitsToExtract - I);
       uint32_t BitsStart =
           OffsetInBits +
           (DL.isLittleEndian() ? I : (NumBitsToExtract - NumBitsInByte - I));
       uint32_t BitsEnd = BitsStart + NumBitsInByte - 1;
       Byte LogicalByte;
+      // Check whether it is a cross-byte access.
       if (((BitsStart ^ BitsEnd) & ~7) == 0)
         LogicalByte = Bytes[BitsStart / 8].lshr(BitsStart % 8);
       else
@@ -142,11 +145,11 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
       uint8_t ActualBits = ((LogicalByte.Value & LogicalByte.ConcreteMask) |
                             (RandomBits & ~LogicalByte.ConcreteMask)) &
                            Mask;
-      BitsData[I / 64] |= static_cast<APInt::WordType>(ActualBits) << (I % 64);
+      RawBits[I / 64] |= static_cast<APInt::WordType>(ActualBits) << (I % 64);
     }
     OffsetInBits = NewOffsetInBits;
 
-    APInt Bits(NumBitsToExtract, BitsData);
+    APInt Bits(NumBitsToExtract, RawBits);
 
     // Padding bits for non-byte-sized scalar types must be zero.
     if (NeedsPadding) {
@@ -167,14 +170,37 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
   assert(OffsetInBits % 8 == 0 && "Missing padding bits.");
   if (auto *VecTy = dyn_cast<VectorType>(Ty)) {
     Type *ElemTy = VecTy->getElementType();
-    std::vector<AnyValue> ValVec;
+    uint32_t ElemBits = DL.getTypeSizeInBits(ElemTy).getFixedValue();
     uint32_t NumElements = getEVL(VecTy->getElementCount());
+    // Check padding bits. <N x iM> acts as if an integer type with N * M bits.
+    uint32_t NewOffsetInBits = OffsetInBits + ElemBits * NumElements;
+    uint32_t AlignedNewOffsetInBits = alignTo(NewOffsetInBits, 8);
+    if (NewOffsetInBits != AlignedNewOffsetInBits) {
+      assert(NewOffsetInBits % 8 != 0 &&
+             AlignedNewOffsetInBits - NewOffsetInBits < 8 &&
+             "Unexpected offset.");
+      // The padding bits are located in the last byte on little-endian systems.
+      // On big-endian systems, the padding bits are located in the first byte.
+      const Byte &PaddingByte =
+          Bytes[(DL.isBigEndian() ? OffsetInBits : NewOffsetInBits) / 8];
+      uint32_t Mask = (~0U << (NewOffsetInBits % 8)) & 255U;
+      // Make sure all high padding bits are zero.
+      if ((PaddingByte.ConcreteMask & ~PaddingByte.Value & Mask) != Mask) {
+        OffsetInBits = AlignedNewOffsetInBits;
+        return AnyValue::getPoisonValue(*this, Ty);
+      }
+      if (DL.isBigEndian())
+        OffsetInBits += AlignedNewOffsetInBits - NewOffsetInBits;
+    }
+
+    std::vector<AnyValue> ValVec;
     ValVec.reserve(NumElements);
     for (uint32_t I = 0; I != NumElements; ++I)
       ValVec.push_back(
           fromBytes(Bytes, ElemTy, OffsetInBits, /*CheckPaddingBits=*/false));
     if (DL.isBigEndian())
       std::reverse(ValVec.begin(), ValVec.end());
+    OffsetInBits = AlignedNewOffsetInBits;
     return AnyValue(std::move(ValVec));
   }
   if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -196,11 +222,7 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
     for (uint32_t I = 0; I != NumElements; ++I) {
       Type *ElemTy = StructTy->getElementType(I);
       TypeSize ElemOffset = Layout->getElementOffset(I);
-      OffsetInBits =
-          BaseOffsetInBits + (ElemOffset.isScalable()
-                                  ? ElemOffset.getKnownMinValue() * VScale
-                                  : ElemOffset.getFixedValue()) *
-                                 8;
+      OffsetInBits = BaseOffsetInBits + getEffectiveTypeSize(ElemOffset) * 8;
       ValVec.push_back(
           fromBytes(Bytes, ElemTy, OffsetInBits, /*CheckPaddingBits=*/true));
     }
@@ -234,7 +256,8 @@ void Context::toBytes(const AnyValue &Val, Type *Ty, uint32_t &OffsetInBits,
             static_cast<uint8_t>(((1U << NumBitsInByte) - 1)
                                  << (BitsStart % 8)),
             static_cast<uint8_t>(BitsVal << (BitsStart % 8)));
-        // Crosses the byte boundary.
+        // If it is a cross-byte access, write the remaining bits to the next
+        // byte.
         if (((BitsStart ^ BitsEnd) & ~7) != 0)
           Bytes[BitsEnd / 8].writeBits(
               static_cast<uint8_t>((1U << (BitsEnd % 8 + 1)) - 1),
@@ -291,8 +314,8 @@ void Context::toBytes(const AnyValue &Val, Type *Ty, uint32_t &OffsetInBits,
     if (NewOffsetInBits != OffsetInBits) {
       assert(OffsetInBits % 8 != 0 && NewOffsetInBits - OffsetInBits < 8 &&
              "Unexpected offset.");
-      // Fill remaining bits with undef.
-      Bytes[OffsetInBits / 8].undefBits(
+      // Fill remaining bits with zero.
+      Bytes[OffsetInBits / 8].zeroBits(
           static_cast<uint8_t>(~0U << (OffsetInBits % 8)));
     }
     OffsetInBits = NewOffsetInBits;
@@ -321,10 +344,7 @@ void Context::toBytes(const AnyValue &Val, Type *Ty, uint32_t &OffsetInBits,
       Type *ElemTy = StructTy->getElementType(I);
       TypeSize ElemOffset = Layout->getElementOffset(I);
       uint32_t NewOffsetInBits =
-          BaseOffsetInBits + (ElemOffset.isScalable()
-                                  ? ElemOffset.getKnownMinValue() * VScale
-                                  : ElemOffset.getFixedValue()) *
-                                 8;
+          BaseOffsetInBits + getEffectiveTypeSize(ElemOffset) * 8;
       FillUndefBytes(NewOffsetInBits);
       toBytes(Val.asAggregate()[I], ElemTy, OffsetInBits, Bytes,
               /*PaddingBits=*/true);
@@ -439,16 +459,11 @@ BasicBlock *Context::getTargetBlock(const Pointer &Ptr) {
 }
 
 uint64_t Context::getEffectiveTypeAllocSize(Type *Ty) {
-  TypeSize Size = DL.getTypeAllocSize(Ty);
-  if (Size.isScalable())
-    return Size.getKnownMinValue() * VScale;
-  return Size.getFixedValue();
+  // FIXME: It is incorrect for overaligned scalable vector types.
+  return getEffectiveTypeSize(DL.getTypeAllocSize(Ty));
 }
 uint64_t Context::getEffectiveTypeStoreSize(Type *Ty) {
-  TypeSize Size = DL.getTypeStoreSize(Ty);
-  if (Size.isScalable())
-    return Size.getKnownMinValue() * VScale;
-  return Size.getFixedValue();
+  return getEffectiveTypeSize(DL.getTypeStoreSize(Ty));
 }
 
 void MemoryObject::markAsFreed() {
diff --git a/llvm/tools/llubi/lib/Context.h b/llvm/tools/llubi/lib/Context.h
index 625b214391c8d..a250004b3cb54 100644
--- a/llvm/tools/llubi/lib/Context.h
+++ b/llvm/tools/llubi/lib/Context.h
@@ -189,11 +189,21 @@ class Context {
   LLVMContext &getContext() const { return Ctx; }
   const DataLayout &getDataLayout() const { return DL; }
   const TargetLibraryInfoImpl &getTLIImpl() const { return TLIImpl; }
+  /// Get the effective vector length for a vector type.
   uint32_t getEVL(ElementCount EC) const {
     if (EC.isScalable())
       return VScale * EC.getKnownMinValue();
     return EC.getFixedValue();
   }
+  /// The result is multiplied by VScale for scalable type sizes.
+  uint64_t getEffectiveTypeSize(TypeSize Size) const {
+    if (Size.isScalable())
+      return VScale * Size.getKnownMinValue();
+    return Size.getFixedValue();
+  }
+  /// Returns DL.getTypeAllocSize/getTypeStoreSize for the given type.
+  /// An exception to this is that for scalable vector types, the size is
+  /// computed as if the vector has getEVL(ElementCount) elements.
   uint64_t getEffectiveTypeAllocSize(Type *Ty);
   uint64_t getEffectiveTypeStoreSize(Type *Ty);
 
@@ -205,8 +215,8 @@ class Context {
   /// Derive a pointer from a memory object with offset 0.
   /// Please use Pointer's interface for further manipulations.
   Pointer deriveFromMemoryObject(IntrusiveRefCntPtr<MemoryObject> Obj);
-  /// Convert byte sequence to an value of the given type. Uninitialized bits
-  /// are flushed according to the options.
+  /// Convert byte sequence to a value of the given type. Uninitialized bits are
+  /// flushed according to the options.
   AnyValue fromBytes(ArrayRef<Byte> Bytes, Type *Ty);
   /// Convert a value to byte sequence. Padding bits are set to zero.
   void toBytes(const AnyValue &Val, Type *Ty, MutableArrayRef<Byte> Bytes);
diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp
index 19493c06163a8..0e940fe502800 100644
--- a/llvm/tools/llubi/lib/Interpreter.cpp
+++ b/llvm/tools/llubi/lib/Interpreter.cpp
@@ -255,8 +255,8 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
   /// to the underlying object if it is valid.
   std::optional<uint64_t> verifyMemAccess(const MemoryObject &MO,
                                           const APInt &Address,
-                                          uint64_t AccessSize,
-                                          uint64_t Alignment, bool IsStore) {
+                                          uint64_t AccessSize, Align Alignment,
+                                          bool IsStore) {
     // Loading from a stack object outside its lifetime is not undefined
     // behavior and returns a poison value instead. Storing to it is still
     // undefined behavior.
@@ -266,8 +266,7 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
       return std::nullopt;
     }
 
-    assert(isPowerOf2_64(Alignment) && "Alignment should be a power of 2.");
-    if (Address.countr_zero() < Log2_64(Alignment)) {
+    if (Address.countr_zero() < Log2(Alignment)) {
       reportImmediateUB("Misaligned memory access.");
       return std::nullopt;
     }
@@ -287,7 +286,7 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     return Offset.getZExtValue();
   }
 
-  AnyValue load(const AnyValue &Ptr, uint64_t Align, Type *ValTy) {
+  AnyValue load(const AnyValue &Ptr, Align Alignment, Type *ValTy) {
     if (Ptr.isPoison()) {
       reportImmediateUB("Invalid memory access with a poison pointer.");
       return AnyValue::getPoisonValue(Ctx, ValTy);
@@ -300,9 +299,10 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
       return AnyValue::getPoisonValue(Ctx, ValTy);
     }
     // TODO: pointer capability check
-    if (auto Offset = verifyMemAccess(
-            *MO, PtrVal.address(), Ctx.getEffectiveTypeStoreSize(ValTy), Align,
-            /*IsStore=*/false)) {
+    if (auto Offset =
+            verifyMemAccess(*MO, PtrVal.address(),
+                            Ctx.getEffectiveTypeStoreSize(ValTy), Alignment,
+                            /*IsStore=*/false)) {
       // Load from a dead stack object yields poison value.
       if (MO->getState() == MemoryObjectState::Dead)
         return AnyValue::getPoisonValue(Ctx, ValTy);
@@ -312,7 +312,7 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     return AnyValue::getPoisonValue(Ctx, ValTy);
   }
 
-  void store(const AnyValue &Ptr, uint64_t Align, const AnyValue &Val,
+  void store(const AnyValue &Ptr, Align Alignment, const AnyValue &Val,
              Type *ValTy) {
     if (Ptr.isPoison()) {
       reportImmediateUB("Invalid memory access with a poison pointer.");
@@ -326,9 +326,10 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
       return;
     }
     // TODO: pointer capability check
-    if (auto Offset = verifyMemAccess(
-            *MO, PtrVal.address(), Ctx.getEffectiveTypeStoreSize(ValTy), Align,
-            /*IsStore=*/true))
+    if (auto Offset =
+            verifyMemAccess(*MO, PtrVal.address(),
+                            Ctx.getEffectiveTypeStoreSize(ValTy), Alignment,
+                            /*IsStore=*/true))
       Ctx.store(*MO, *Offset, Val, ValTy);
   }
 
@@ -1001,10 +1002,7 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
       // TODO: Should be documented in LangRef: GEPs with nowrap flags should
       // return poison when the type size exceeds index space.
       TypeSize Offset = GTI.getSequentialElementStride(DL);
-      APInt Scale(IndexBitWidth,
-                  Offset.isScalable()
-                      ? Offset.getKnownMinValue() * Ctx.getVScale()
-                      : Offset.getFixedValue(),
+      APInt Scale(IndexBitWidth, Ctx.getEffectiveTypeSize(Offset),
                   /*isSigned=*/false, /*implicitTrunc=*/true);
       if (!Scale.isZero())
         ApplyScaledOffset(getValue(V), Scale);
@@ -1025,8 +1023,8 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
   }
 
   void visitLoadInst(LoadInst &LI) {
-    auto RetVal = load(getValue(LI.getPointerOperand()), LI.getAlign().value(),
-                       LI.getType());
+    auto RetVal =
+        load(getValue(LI.getPointerOperand()), LI.getAlign(), LI.getType());
     // TODO: track volatile loads
     // TODO: handle metadata
     setResult(LI, std::move(RetVal));
@@ -1037,7 +1035,7 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
     auto &Val = getValue(SI.getValueOperand());
     // TODO: track volatile stores
     // TODO: handle metadata
-    store(Ptr, SI.getAlign().value(), Val, SI.getValueOperand()->getType());
+    store(Ptr, SI.getAlign(), Val, SI.getValueOperand()->getType());
     if (Status)
       Status &= Handler.onInstructionExecuted(SI, AnyValue());
   }

>From b63f1b1eacc35e6079eb20e27b09fd9de0ac8a62 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 5 Mar 2026 05:53:00 +0800
Subject: [PATCH 5/6] [llubi] Handle over-aligned arrays

---
 .../test/tools/llubi/loadstore_overaligned.ll | 30 +++++++++++++++++
 llvm/tools/llubi/lib/Context.cpp              | 33 ++++++++++++-------
 2 files changed, 51 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/tools/llubi/loadstore_overaligned.ll

diff --git a/llvm/test/tools/llubi/loadstore_overaligned.ll b/llvm/test/tools/llubi/loadstore_overaligned.ll
new file mode 100644
index 0000000000000..91a4eb63d9d2a
--- /dev/null
+++ b/llvm/test/tools/llubi/loadstore_overaligned.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
+; RUN: llubi --verbose < %s 2>&1 | FileCheck %s
+
+; For over-aligned arrays, the padding bytes between elements should be filled with undef.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:64:64-i64:64:64"
+
+define void @main() {
+  %alloca = alloca [2 x i32]
+  store <4 x i32> zeroinitializer, ptr %alloca, align 8
+  store [2 x i32] [i32 1, i32 1], ptr %alloca, align 8
+  ; The padding bytes in the middle are filled with undef,
+  ; while the tail padding bytes are still zero-initialized.
+  %load1 = load <4 x i32>, ptr %alloca, align 8
+  %load2 = load <4 x i32>, ptr %alloca, align 8
+  %load3 = load <4 x i32>, ptr %alloca, align 8
+  %load_arr = load [2 x i32], ptr %alloca, align 8
+  ret void
+}
+
+; CHECK: Entering function: main
+; CHECK-NEXT:   %alloca = alloca [2 x i32], align 8 => ptr 0x8 [alloca]
+; CHECK-NEXT:   store <4 x i32> zeroinitializer, ptr %alloca, align 8
+; CHECK-NEXT:   store [2 x i32] [i32 1, i32 1], ptr %alloca, align 8
+; CHECK-NEXT:   %load1 = load <4 x i32>, ptr %alloca, align 8 => { i32 1, i32 -1744110296, i32 1, i32 0 }
+; CHECK-NEXT:   %load2 = load <4 x i32>, ptr %alloca, align 8 => { i32 1, i32 1822494346, i32 1, i32 0 }
+; CHECK-NEXT:   %load3 = load <4 x i32>, ptr %alloca, align 8 => { i32 1, i32 -1486034729, i32 1, i32 0 }
+; CHECK-NEXT:   %load_arr = load [2 x i32], ptr %alloca, align 8 => { i32 1, i32 1 }
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: Exiting function: main
diff --git a/llvm/tools/llubi/lib/Context.cpp b/llvm/tools/llubi/lib/Context.cpp
index cfe96625b2c09..fe95eef57c2ab 100644
--- a/llvm/tools/llubi/lib/Context.cpp
+++ b/llvm/tools/llubi/lib/Context.cpp
@@ -205,12 +205,16 @@ AnyValue Context::fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
   }
   if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
     Type *ElemTy = ArrTy->getElementType();
+    uint32_t StrideInBits = getEffectiveTypeAllocSize(ElemTy) * 8;
     std::vector<AnyValue> ValVec;
     uint32_t NumElements = ArrTy->getNumElements();
     ValVec.reserve(NumElements);
-    for (uint32_t I = 0; I != NumElements; ++I)
+    uint32_t BaseOffsetInBits = OffsetInBits;
+    for (uint32_t I = 0; I != NumElements; ++I) {
+      OffsetInBits = BaseOffsetInBits + I * StrideInBits;
       ValVec.push_back(
           fromBytes(Bytes, ElemTy, OffsetInBits, /*CheckPaddingBits=*/true));
+    }
     return AnyValue(std::move(ValVec));
   }
   if (auto *StructTy = dyn_cast<StructType>(Ty)) {
@@ -321,25 +325,30 @@ void Context::toBytes(const AnyValue &Val, Type *Ty, uint32_t &OffsetInBits,
     OffsetInBits = NewOffsetInBits;
     return;
   }
+  auto FillUndefBytes = [&](uint32_t NewOffsetInBits) {
+    if (OffsetInBits == NewOffsetInBits)
+      return;
+    // Fill padding bits due to alignment requirement.
+    assert(NewOffsetInBits > OffsetInBits &&
+           "Unexpected negative padding bits!");
+    fill(Bytes.slice(OffsetInBits / 8, (NewOffsetInBits - OffsetInBits) / 8),
+         Byte::undef());
+    OffsetInBits = NewOffsetInBits;
+  };
   if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
     Type *ElemTy = ArrTy->getElementType();
-    for (const auto &SubVal : Val.asAggregate())
+    uint32_t CurrentOffsetInBits = OffsetInBits;
+    uint32_t StrideInBits = getEffectiveTypeAllocSize(ElemTy) * 8;
+    for (const auto &SubVal : Val.asAggregate()) {
+      FillUndefBytes(CurrentOffsetInBits);
       toBytes(SubVal, ElemTy, OffsetInBits, Bytes, /*PaddingBits=*/true);
+      CurrentOffsetInBits += StrideInBits;
+    }
     return;
   }
   if (auto *StructTy = dyn_cast<StructType>(Ty)) {
     auto *Layout = DL.getStructLayout(StructTy);
     uint32_t BaseOffsetInBits = OffsetInBits;
-    auto FillUndefBytes = [&](uint32_t NewOffsetInBits) {
-      if (OffsetInBits == NewOffsetInBits)
-        return;
-      // Fill padding bits due to alignment requirement.
-      assert(NewOffsetInBits > OffsetInBits &&
-             "Unexpected negative padding bits!");
-      fill(Bytes.slice(OffsetInBits / 8, (NewOffsetInBits - OffsetInBits) / 8),
-           Byte::undef());
-      OffsetInBits = NewOffsetInBits;
-    };
     for (uint32_t I = 0, E = Val.asAggregate().size(); I != E; ++I) {
       Type *ElemTy = StructTy->getElementType(I);
       TypeSize ElemOffset = Layout->getElementOffset(I);

>From 3ebdd689ae06f24b93410c7cb81b3b6a38700823 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 7 Mar 2026 01:55:31 +0800
Subject: [PATCH 6/6] [llubi] Address review comments.

---
 llvm/test/tools/llubi/loadstore_be.ll | 14 ++++++++++++--
 llvm/test/tools/llubi/loadstore_le.ll | 14 ++++++++++++--
 llvm/tools/llubi/lib/Interpreter.cpp  |  1 -
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/llvm/test/tools/llubi/loadstore_be.ll b/llvm/test/tools/llubi/loadstore_be.ll
index 6e6d738b002ca..2e03939db7859 100644
--- a/llvm/test/tools/llubi/loadstore_be.ll
+++ b/llvm/test/tools/llubi/loadstore_be.ll
@@ -46,6 +46,12 @@ define void @main() {
   %val14 = load i32, ptr %alloc_lifetime
   store i32 77, ptr %alloc_lifetime
   %val15 = load i32, ptr %alloc_lifetime
+  ; Calling lifetime.start on an alive object makes the contents uninitialized again.
+  call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+  %val_undef1 = load i32, ptr %alloc_lifetime
+  %val_undef2 = load i32, ptr %alloc_lifetime
+  call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
+  ; Calling lifetime.end on an already dead object is a no-op.
   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
   ; Load of an dead object yields poison value.
   %val16 = load i32, ptr %alloc_lifetime
@@ -134,6 +140,10 @@ define void @main() {
 ; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 1822494346
 ; CHECK-NEXT:   store i32 77, ptr %alloc_lifetime, align 4
 ; CHECK-NEXT:   %val15 = load i32, ptr %alloc_lifetime, align 4 => i32 77
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+; CHECK-NEXT:   %val_undef1 = load i32, ptr %alloc_lifetime, align 4 => i32 -1486034729
+; CHECK-NEXT:   %val_undef2 = load i32, ptr %alloc_lifetime, align 4 => i32 1900108014
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
 ; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
 ; CHECK-NEXT:   %val16 = load i32, ptr %alloc_lifetime, align 4 => poison
 ; CHECK-NEXT:   store i32 -524288, ptr %alloc, align 4
@@ -149,8 +159,8 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_struct = alloca %struct, align 8 => ptr 0x30 [alloc_struct]
 ; CHECK-NEXT:   store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct, align 8
 ; CHECK-NEXT:   %val19 = load %struct, ptr %alloc_struct, align 8 => { { i16 1, i16 2 }, i64 3 }
-; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 281486375577815
-; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 281485466753262
+; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 281484800733898
+; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 281484196877349
 ; CHECK-NEXT:   %alloc_struct_packed = alloca %struct.packed, align 8 => ptr 0x40 [alloc_struct_packed]
 ; CHECK-NEXT:   store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed, align 1
 ; CHECK-NEXT:   %val22 = load %struct.packed, ptr %alloc_struct_packed, align 1 => { { i16 1, i16 2 }, i64 3 }
diff --git a/llvm/test/tools/llubi/loadstore_le.ll b/llvm/test/tools/llubi/loadstore_le.ll
index c876337d3f2a7..e1170fb3854a2 100644
--- a/llvm/test/tools/llubi/loadstore_le.ll
+++ b/llvm/test/tools/llubi/loadstore_le.ll
@@ -47,6 +47,12 @@ define void @main() {
   %val14 = load i32, ptr %alloc_lifetime
   store i32 77, ptr %alloc_lifetime
   %val15 = load i32, ptr %alloc_lifetime
+  ; Calling lifetime.start on an alive object makes the contents uninitialized again.
+  call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+  %val_undef1 = load i32, ptr %alloc_lifetime
+  %val_undef2 = load i32, ptr %alloc_lifetime
+  call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
+  ; Calling lifetime.end on an already dead object is a no-op.
   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
   ; Load of an dead object yields poison value.
   %val16 = load i32, ptr %alloc_lifetime
@@ -136,6 +142,10 @@ define void @main() {
 ; CHECK-NEXT:   %val14 = load i32, ptr %alloc_lifetime, align 4 => i32 1822494346
 ; CHECK-NEXT:   store i32 77, ptr %alloc_lifetime, align 4
 ; CHECK-NEXT:   %val15 = load i32, ptr %alloc_lifetime, align 4 => i32 77
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %alloc_lifetime)
+; CHECK-NEXT:   %val_undef1 = load i32, ptr %alloc_lifetime, align 4 => i32 -1486034729
+; CHECK-NEXT:   %val_undef2 = load i32, ptr %alloc_lifetime, align 4 => i32 1900108014
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
 ; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %alloc_lifetime)
 ; CHECK-NEXT:   %val16 = load i32, ptr %alloc_lifetime, align 4 => poison
 ; CHECK-NEXT:   store i32 -524288, ptr %alloc, align 4
@@ -151,8 +161,8 @@ define void @main() {
 ; CHECK-NEXT:   %alloc_struct = alloca %struct, align 8 => ptr 0x30 [alloc_struct]
 ; CHECK-NEXT:   store %struct { [2 x i16] [i16 1, i16 2], i64 3 }, ptr %alloc_struct, align 8
 ; CHECK-NEXT:   %val19 = load %struct, ptr %alloc_struct, align 8 => { { i16 1, i16 2 }, i64 3 }
-; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 -6382470561775091711
-; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 8160901778997641217
+; CHECK-NEXT:   %val20 = load i64, ptr %alloc_struct, align 8 => i64 5300370392114921473
+; CHECK-NEXT:   %val21 = load i64, ptr %alloc_struct, align 8 => i64 2706826262684499969
 ; CHECK-NEXT:   %alloc_struct_packed = alloca %struct.packed, align 8 => ptr 0x40 [alloc_struct_packed]
 ; CHECK-NEXT:   store %struct.packed <{ [2 x i16] [i16 1, i16 2], i64 3 }>, ptr %alloc_struct_packed, align 1
 ; CHECK-NEXT:   %val22 = load %struct.packed, ptr %alloc_struct_packed, align 1 => { { i16 1, i16 2 }, i64 3 }
diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp
index 0e940fe502800..dd5530a355538 100644
--- a/llvm/tools/llubi/lib/Interpreter.cpp
+++ b/llvm/tools/llubi/lib/Interpreter.cpp
@@ -536,7 +536,6 @@ class InstExecutor : public InstVisitor<InstExecutor, void> {
         fill(MO->getBytes(), Byte::undef());
       } else {
         MO->setState(MemoryObjectState::Dead);
-        fill(MO->getBytes(), Byte::poison());
       }
       return AnyValue();
     }