[llvm] [AggressiveInstCombine] Implement store merge optimization (PR #147540)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 07:44:15 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
Merge multiple small stores that were originally extracted from one value into a single store.
This is the store equivalent of the load merge optimization that AggressiveInstCombine already performs.
This implementation is something of an MVP, with various generalizations possible.
Fixes https://github.com/llvm/llvm-project/issues/147456.
Compile-time: https://llvm-compile-time-tracker.com/compare.php?from=dcc692a42f6aefc2377f1470b775a35c93c8c32c&to=74c8f57d4471700058ce2fe00a23c0ae5a483ff4&stat=instructions%3Au
---
Patch is 39.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147540.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp (+138)
- (added) llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll (+106)
- (added) llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll (+790)
``````````diff
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 8c156c93ba8d1..259ad588dc8f5 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -842,6 +842,141 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
return true;
}
+/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
+struct PartStore {
+ Value *PtrBase;
+ APInt PtrOffset;
+ Value *Val;
+ uint64_t ValOffset;
+ uint64_t ValWidth;
+ StoreInst *Store;
+
+ bool isCompatibleWith(const PartStore &Other) const {
+ return PtrBase == Other.PtrBase && Val == Other.Val;
+ }
+
+ bool operator<(const PartStore &Other) const {
+ return PtrOffset.slt(Other.PtrOffset);
+ }
+};
+
+static std::optional<PartStore> matchPartStore(Instruction &I,
+ const DataLayout &DL) {
+ auto *Store = dyn_cast<StoreInst>(&I);
+ if (!Store || !Store->isSimple())
+ return std::nullopt;
+
+ Value *StoredVal = Store->getValueOperand();
+ Type *StoredTy = StoredVal->getType();
+ if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
+ return std::nullopt;
+
+ uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
+ uint64_t ValOffset = 0;
+ Value *Val;
+ if (!match(StoredVal, m_CombineOr(m_Trunc(m_LShr(m_Value(Val),
+ m_ConstantInt(ValOffset))),
+ m_Trunc(m_Value(Val)))))
+ return std::nullopt;
+
+ Value *Ptr = Store->getPointerOperand();
+ APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(
+ DL, PtrOffset, /*AllowNonInbounds=*/true);
+ return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
+}
+
+static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
+ const DataLayout &DL, TargetTransformInfo &TTI) {
+ if (Parts.size() < 2)
+ return false;
+
+ // We now have multiple parts of the same value stored to the same pointer.
+ // Sort the parts by pointer offset, and make sure they are consistent with
+ // the value offsets. Also check that the value is fully covered without
+ // overlaps.
+ // FIXME: We could support merging stores for only part of the value here.
+ llvm::sort(Parts);
+ int64_t LastEndOffsetFromFirst = 0;
+ const PartStore &First = Parts[0];
+ for (const PartStore &Part : Parts) {
+ APInt PtrOffsetFromFirst = Part.PtrOffset - First.PtrOffset;
+ int64_t ValOffsetFromFirst = Part.ValOffset - First.ValOffset;
+ if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
+ LastEndOffsetFromFirst != ValOffsetFromFirst)
+ return false;
+ LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
+ }
+
+ // Check whether combining the stores is profitable.
+ // FIXME: We could generate smaller stores if we can't produce a large one.
+ LLVMContext &Ctx = First.Store->getContext();
+ Type *NewTy = Type::getIntNTy(Ctx, LastEndOffsetFromFirst);
+ unsigned Fast = 0;
+ if (!TTI.isTypeLegal(NewTy) ||
+ !TTI.allowsMisalignedMemoryAccesses(Ctx, LastEndOffsetFromFirst,
+ First.Store->getPointerAddressSpace(),
+ First.Store->getAlign(), &Fast) ||
+ !Fast)
+ return false;
+
+ // Generate the combined store.
+ IRBuilder<> Builder(First.Store);
+ Value *Val = First.Val;
+ if (First.ValOffset != 0)
+ Val = Builder.CreateLShr(Val, First.ValOffset);
+ Val = Builder.CreateTrunc(Val, NewTy);
+ Value *Ptr = First.PtrBase;
+ if (First.PtrOffset != 0)
+ Ptr = Builder.CreateInBoundsPtrAdd(Ptr, Builder.getInt(First.PtrOffset));
+ StoreInst *Store =
+ Builder.CreateAlignedStore(Val, Ptr, First.Store->getAlign());
+
+ AAMDNodes AATags = First.Store->getAAMetadata();
+ for (const PartStore &Part : drop_begin(Parts))
+ AATags = AATags.concat(Part.Store->getAAMetadata());
+ Store->setAAMetadata(AATags);
+
+ // Remove the old stores.
+ for (const PartStore &Part : Parts)
+ Part.Store->eraseFromParent();
+
+ return true;
+}
+
+static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,
+ TargetTransformInfo &TTI, AliasAnalysis &AA) {
+ // FIXME: Add big endian support.
+ if (DL.isBigEndian())
+ return false;
+
+ SmallVector<PartStore, 8> Parts;
+ bool MadeChange = false;
+ for (Instruction &I : make_early_inc_range(BB)) {
+ if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
+ if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
+ Parts.push_back(std::move(*Part));
+ continue;
+ }
+
+ MadeChange |= mergePartStores(Parts, DL, TTI);
+ Parts.clear();
+ Parts.push_back(std::move(*Part));
+ continue;
+ }
+
+ // FIXME: Use AA to make this more precise.
+ if (I.mayReadOrWriteMemory() || I.mayThrow()) {
+ MadeChange |= mergePartStores(Parts, DL, TTI);
+ Parts.clear();
+ continue;
+ }
+ }
+
+ MadeChange |= mergePartStores(Parts, DL, TTI);
+ return MadeChange;
+}
+
/// Combine away instructions providing they are still equivalent when compared
/// against 0. i.e do they have any bits set.
static Value *optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder) {
@@ -1330,6 +1465,9 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// bugs.
MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
}
+
+ // Do this separately to avoid redundantly scanning stores multiple times.
+ MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
}
// We're done with transforms, so remove dead instructions.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll
new file mode 100644
index 0000000000000..34f39245500b9
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=aggressive-instcombine -mtriple=x86_64-unknown-linux-gnu -data-layout="E-n64" < %s | FileCheck %s
+
+; Pretend X86 is big endian.
+
+; FIXME: Big endian not supported yet.
+
+define void @test_i32_be(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i32_be(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr i8, ptr [[P]], i64 3
+; CHECK-NEXT: store i8 [[X_0]], ptr [[GEP_0]], align 1
+; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8
+; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1
+; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[SHR_2]] to i8
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 24
+; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT: store i8 [[X_3]], ptr [[P]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ %gep.0 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.0, ptr %gep.0
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.1, ptr %gep.1
+ %shr.2 = lshr i32 %x, 16
+ %x.2 = trunc i32 %shr.2 to i8
+ %gep.2 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.2, ptr %gep.2
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ store i8 %x.3, ptr %p
+ ret void
+}
+
+define void @test_i32_le(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i32_le(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1
+; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8
+; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1
+; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[SHR_2]] to i8
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1
+; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24
+; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3
+; CHECK-NEXT: store i8 [[X_3]], ptr [[GEP_3]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %shr.2 = lshr i32 %x, 16
+ %x.2 = trunc i32 %shr.2 to i8
+ %gep.2 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.2, ptr %gep.2
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ ret void
+}
+
+define void @test_i32_mixed_parts(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i32_mixed_parts(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr i8, ptr [[P]], i64 3
+; CHECK-NEXT: store i8 [[X_0]], ptr [[GEP_0]], align 1
+; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8
+; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i16
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: store i16 [[X_1]], ptr [[GEP_1]], align 2
+; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24
+; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8
+; CHECK-NEXT: store i8 [[X_3]], ptr [[P]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ %gep.0 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.0, ptr %gep.0
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i16
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i16 %x.1, ptr %gep.1
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ store i8 %x.3, ptr %p
+ ret void
+}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
new file mode 100644
index 0000000000000..c0fbc6421c7ab
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
@@ -0,0 +1,790 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=aggressive-instcombine -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use.i16(i16)
+declare void @use.i32(i32)
+
+define void @test_i16(i16 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i16(
+; CHECK-SAME: i16 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store i16 [[X]], ptr [[P]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i16 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i16 %x, 8
+ %x.1 = trunc i16 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ ret void
+}
+
+define void @test_i32_i8_parts(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i32_i8_parts(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %shr.2 = lshr i32 %x, 16
+ %x.2 = trunc i32 %shr.2 to i8
+ %gep.2 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.2, ptr %gep.2
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ ret void
+}
+
+define void @test_i32_i16_parts(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i32_i16_parts(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i16
+ store i16 %x.0, ptr %p
+ %shr.1 = lshr i32 %x, 16
+ %x.1 = trunc i32 %shr.1 to i16
+ %gep.1 = getelementptr i8, ptr %p, i64 2
+ store i16 %x.1, ptr %gep.1
+ ret void
+}
+
+define void @test_i32_mixed_parts(i32 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i32_mixed_parts(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i32 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i32 %x, 8
+ %x.1 = trunc i32 %shr.1 to i16
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i16 %x.1, ptr %gep.1
+ %shr.3 = lshr i32 %x, 24
+ %x.3 = trunc i32 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ ret void
+}
+
+define void @test_i64(i64 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i64(
+; CHECK-SAME: i64 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store i64 [[X]], ptr [[P]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i64 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i64 %x, 8
+ %x.1 = trunc i64 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %shr.2 = lshr i64 %x, 16
+ %x.2 = trunc i64 %shr.2 to i8
+ %gep.2 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.2, ptr %gep.2
+ %shr.3 = lshr i64 %x, 24
+ %x.3 = trunc i64 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ %shr.4 = lshr i64 %x, 32
+ %x.4 = trunc i64 %shr.4 to i8
+ %gep.4 = getelementptr i8, ptr %p, i64 4
+ store i8 %x.4, ptr %gep.4
+ %shr.5 = lshr i64 %x, 40
+ %x.5 = trunc i64 %shr.5 to i8
+ %gep.5 = getelementptr i8, ptr %p, i64 5
+ store i8 %x.5, ptr %gep.5
+ %shr.6 = lshr i64 %x, 48
+ %x.6 = trunc i64 %shr.6 to i8
+ %gep.6 = getelementptr i8, ptr %p, i64 6
+ store i8 %x.6, ptr %gep.6
+ %shr.7 = lshr i64 %x, 56
+ %x.7 = trunc i64 %shr.7 to i8
+ %gep.7 = getelementptr i8, ptr %p, i64 7
+ store i8 %x.7, ptr %gep.7
+ ret void
+}
+
+define void @test_i128(i128 %x, ptr %p) {
+; CHECK-LABEL: define void @test_i128(
+; CHECK-SAME: i128 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[X_0:%.*]] = trunc i128 [[X]] to i8
+; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1
+; CHECK-NEXT: [[SHR_1:%.*]] = lshr i128 [[X]], 8
+; CHECK-NEXT: [[X_1:%.*]] = trunc i128 [[SHR_1]] to i8
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1
+; CHECK-NEXT: [[SHR_2:%.*]] = lshr i128 [[X]], 16
+; CHECK-NEXT: [[X_2:%.*]] = trunc i128 [[SHR_2]] to i8
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1
+; CHECK-NEXT: [[SHR_3:%.*]] = lshr i128 [[X]], 24
+; CHECK-NEXT: [[X_3:%.*]] = trunc i128 [[SHR_3]] to i8
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3
+; CHECK-NEXT: store i8 [[X_3]], ptr [[GEP_3]], align 1
+; CHECK-NEXT: [[SHR_4:%.*]] = lshr i128 [[X]], 32
+; CHECK-NEXT: [[X_4:%.*]] = trunc i128 [[SHR_4]] to i8
+; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; CHECK-NEXT: store i8 [[X_4]], ptr [[GEP_4]], align 1
+; CHECK-NEXT: [[SHR_5:%.*]] = lshr i128 [[X]], 40
+; CHECK-NEXT: [[X_5:%.*]] = trunc i128 [[SHR_5]] to i8
+; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[P]], i64 5
+; CHECK-NEXT: store i8 [[X_5]], ptr [[GEP_5]], align 1
+; CHECK-NEXT: [[SHR_6:%.*]] = lshr i128 [[X]], 48
+; CHECK-NEXT: [[X_6:%.*]] = trunc i128 [[SHR_6]] to i8
+; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[P]], i64 6
+; CHECK-NEXT: store i8 [[X_6]], ptr [[GEP_6]], align 1
+; CHECK-NEXT: [[SHR_7:%.*]] = lshr i128 [[X]], 56
+; CHECK-NEXT: [[X_7:%.*]] = trunc i128 [[SHR_7]] to i8
+; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[P]], i64 7
+; CHECK-NEXT: store i8 [[X_7]], ptr [[GEP_7]], align 1
+; CHECK-NEXT: [[SHR_8:%.*]] = lshr i128 [[X]], 64
+; CHECK-NEXT: [[X_8:%.*]] = trunc i128 [[SHR_8]] to i8
+; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[P]], i64 8
+; CHECK-NEXT: store i8 [[X_8]], ptr [[GEP_8]], align 1
+; CHECK-NEXT: [[SHR_9:%.*]] = lshr i128 [[X]], 72
+; CHECK-NEXT: [[X_9:%.*]] = trunc i128 [[SHR_9]] to i8
+; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[P]], i64 9
+; CHECK-NEXT: store i8 [[X_9]], ptr [[GEP_9]], align 1
+; CHECK-NEXT: [[SHR_10:%.*]] = lshr i128 [[X]], 80
+; CHECK-NEXT: [[X_10:%.*]] = trunc i128 [[SHR_10]] to i8
+; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[P]], i64 10
+; CHECK-NEXT: store i8 [[X_10]], ptr [[GEP_10]], align 1
+; CHECK-NEXT: [[SHR_11:%.*]] = lshr i128 [[X]], 88
+; CHECK-NEXT: [[X_11:%.*]] = trunc i128 [[SHR_11]] to i8
+; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[P]], i64 11
+; CHECK-NEXT: store i8 [[X_11]], ptr [[GEP_11]], align 1
+; CHECK-NEXT: [[SHR_12:%.*]] = lshr i128 [[X]], 96
+; CHECK-NEXT: [[X_12:%.*]] = trunc i128 [[SHR_12]] to i8
+; CHECK-NEXT: [[GEP_12:%.*]] = getelementptr i8, ptr [[P]], i64 12
+; CHECK-NEXT: store i8 [[X_12]], ptr [[GEP_12]], align 1
+; CHECK-NEXT: [[SHR_13:%.*]] = lshr i128 [[X]], 104
+; CHECK-NEXT: [[X_13:%.*]] = trunc i128 [[SHR_13]] to i8
+; CHECK-NEXT: [[GEP_13:%.*]] = getelementptr i8, ptr [[P]], i64 13
+; CHECK-NEXT: store i8 [[X_13]], ptr [[GEP_13]], align 1
+; CHECK-NEXT: [[SHR_14:%.*]] = lshr i128 [[X]], 112
+; CHECK-NEXT: [[X_14:%.*]] = trunc i128 [[SHR_14]] to i8
+; CHECK-NEXT: [[GEP_14:%.*]] = getelementptr i8, ptr [[P]], i64 14
+; CHECK-NEXT: store i8 [[X_14]], ptr [[GEP_14]], align 1
+; CHECK-NEXT: [[SHR_15:%.*]] = lshr i128 [[X]], 120
+; CHECK-NEXT: [[X_15:%.*]] = trunc i128 [[SHR_15]] to i8
+; CHECK-NEXT: [[GEP_15:%.*]] = getelementptr i8, ptr [[P]], i64 15
+; CHECK-NEXT: store i8 [[X_15]], ptr [[GEP_15]], align 1
+; CHECK-NEXT: ret void
+;
+ %x.0 = trunc i128 %x to i8
+ store i8 %x.0, ptr %p
+ %shr.1 = lshr i128 %x, 8
+ %x.1 = trunc i128 %shr.1 to i8
+ %gep.1 = getelementptr i8, ptr %p, i64 1
+ store i8 %x.1, ptr %gep.1
+ %shr.2 = lshr i128 %x, 16
+ %x.2 = trunc i128 %shr.2 to i8
+ %gep.2 = getelementptr i8, ptr %p, i64 2
+ store i8 %x.2, ptr %gep.2
+ %shr.3 = lshr i128 %x, 24
+ %x.3 = trunc i128 %shr.3 to i8
+ %gep.3 = getelementptr i8, ptr %p, i64 3
+ store i8 %x.3, ptr %gep.3
+ %shr.4 = lshr i128 %x, 32
+ %x.4 = trunc i128 %shr.4 to i8
+ %gep.4 = getelementptr i8, ptr %p, i64 4
+ store i8 %x.4, ptr %gep.4
+ %shr.5 = lshr i128 %x, 40
+ %x.5 = trunc i128 %shr.5 to i8
+ %gep.5 = getelementptr i8, ptr %p, i64 5
+ store i8 %x.5, ptr %gep.5
+ %shr.6 = lshr i128 %x, 48
+ %x.6 = trunc i128 %shr.6 to i8
+ %gep.6 = getelementptr i8, ptr %p, i64 6
+ store i8 %x.6, ptr %gep.6
+ %shr.7 = lshr i128 %x, 56
+ %x.7 = trunc i128 %shr.7 to i8
+ %gep.7 = getelementptr i8, ptr %p, i64 7
+ store i8 %x.7, ptr %gep.7
+ %shr.8 = lshr i128 %x, 64
+ %x.8 = trunc i128 %shr.8 to i8
+ %gep.8 = getelementptr i8, ptr %p, i64 8
+ store i8 %x.8, ptr %gep.8
+ %shr.9 = lshr i128 %x, 72
+ %x.9 = trunc i128 %shr.9 to i8
+ %gep.9 = getelementptr i8, ptr %p, i64 9
+ store i8 %x.9, ptr %gep.9
+ %shr.10 = lshr i128 %x, 80
+ %x.10 = trunc i128 %shr.10 to i8
+ %gep.10 = getelementptr i8, ptr %p, i64 10
+ store i8 %x.10, ptr %gep.10
+ %shr.11 = lshr i128 %x, 88
+ %x.11 = trunc i128 %shr.11 to i8
+ %gep.11 = getelementptr i8, ptr %p, i64 11
+ store i8 %x.11, ptr %gep.11
+ %shr.12 = lshr i128 %x, 96
+ %x.12 = trunc i128 %shr.12 to i8
+ %gep.12 = getelementptr i8, ptr %p, i64 12
+ store i8 %x.12, ptr %gep.12
+ %shr.13 = lshr i128 %x, 104
+ %x.13 = trunc i128 %shr.13 to i8
+ %gep.13 = getelementptr i8, ptr %p, i64 13
+ store i8 %x.13, ptr %gep.13
+ %shr.14 = lshr i128 %x, 112
+ %x.14 = trunc i128 %shr.14 to i8
+ %gep.14 = getelementptr i8, ptr %p, i64 14
+ store i8 %x.14, ptr %gep.14
+ %shr.15 = lshr i128 %x, 120
+ %x.15 = trunc i128 %shr.15 to i8
+ %gep.15 = getelemen...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/147540
More information about the llvm-commits
mailing list