[llvm] [InstCombine] Convert load from LUT into a select (PR #98339)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 15:48:16 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
Alive2: https://alive2.llvm.org/ce/z/ESddB9
This patch converts loads from small LUT into selects. It will reduce binary size and unblock some optimizations.
---
Full diff: https://github.com/llvm/llvm-project/pull/98339.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (+103)
- (modified) llvm/test/Transforms/InstCombine/load-cmp.ll (+2-6)
- (added) llvm/test/Transforms/InstCombine/load-global.ll (+219)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 21d5e1dece024..e5a0ac4e55370 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -998,6 +998,105 @@ static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
return false;
}
+static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
+ if (LI.isVolatile())
+ return nullptr;
+
+ auto *GEP = dyn_cast<GetElementPtrInst>(LI.getPointerOperand());
+ if (!GEP)
+ return nullptr;
+
+ auto *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return nullptr;
+
+ Constant *Init = GV->getInitializer();
+ if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+ return nullptr;
+
+ Type *EltTy = Init->getType()->getArrayElementType();
+ if (EltTy != LI.getType())
+ return nullptr;
+
+ uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+ // Don't blow up on huge arrays.
+ // This threshold is chosen based on statistics on a dataset
+ // which is collected from real-world applications.
+ constexpr uint64_t MaxArraySize = 16;
+ if (ArrayElementCount > MaxArraySize)
+ return nullptr;
+
+ auto &DL = LI.getDataLayout();
+ uint64_t IndexBW = DL.getIndexTypeSizeInBits(GEP->getType());
+ APInt ConstOffset(IndexBW, 0);
+ MapVector<Value *, APInt> VariableOffsets;
+ if (!GEP->collectOffset(DL, IndexBW, VariableOffsets, ConstOffset))
+ return nullptr;
+
+ if (!ConstOffset.isZero() || VariableOffsets.size() != 1 ||
+ VariableOffsets.front().second !=
+ DL.getTypeAllocSize(EltTy).getFixedValue())
+ return nullptr;
+
+ Value *Index = VariableOffsets.front().first;
+ if (Index->getType()->getScalarSizeInBits() != IndexBW)
+ return nullptr;
+
+ SmallMapVector<Constant *, uint64_t, 2> ValueMap;
+ // MultiMapIdx indicates that this value occurs more than once in the array.
+ constexpr uint64_t MultiMapIdx = static_cast<uint64_t>(-1);
+ uint32_t MultiMapElts = 0;
+ for (uint64_t I = 0; I < ArrayElementCount; ++I) {
+ Constant *Elt = Init->getAggregateElement(I);
+
+ // bail out if the array contains undef values
+ if (isa<UndefValue>(Elt))
+ return nullptr;
+
+ if (auto *It = ValueMap.find(Elt); It != ValueMap.end()) {
+ if (It->second == MultiMapIdx)
+ continue;
+ if (++MultiMapElts == 2)
+ return nullptr;
+ It->second = MultiMapIdx;
+ } else {
+ if (ValueMap.size() == 2)
+ return nullptr;
+ ValueMap.insert(std::make_pair(Elt, I));
+ }
+ }
+
+ // Handle load from uniform arrays.
+ if (ValueMap.size() == 1)
+ return ValueMap.begin()->first;
+
+ // Now we have two unique values in the array. And at least one value
+ // only occurs in Array[Index].
+ assert(ValueMap.size() == 2);
+
+ auto [C1, I1] = *ValueMap.begin();
+ auto [C2, I2] = *ValueMap.rbegin();
+ assert((I1 != MultiMapIdx || I2 != MultiMapIdx) &&
+ "Should have a one to one mapping");
+ Value *TrueArm;
+ Value *FalseArm;
+ uint64_t C;
+ if (I1 != MultiMapIdx) {
+ TrueArm = C1;
+ FalseArm = C2;
+ C = I1;
+ } else {
+ TrueArm = C2;
+ FalseArm = C1;
+ C = I2;
+ }
+
+ return Builder.CreateSelect(
+ Builder.CreateICmp(ICmpInst::ICMP_EQ, Index,
+ ConstantInt::get(Index->getType(), C)),
+ TrueArm, FalseArm);
+}
+
Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
@@ -1048,6 +1147,10 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
}
+ // Convert load from a constant lookup table into select
+ if (auto *V = foldLoadFromIndexedGlobal(LI, Builder))
+ return replaceInstUsesWith(LI, V);
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index b956de29e0b8d..1624fa8483a3f 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s
+; RUN: opt -passes="instcombine<max-iterations=2>" -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
i16 73, i16 82, i16 69, i16 68, i16 0]
@@ -340,11 +340,7 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
define i1 @pr93017(i64 %idx) {
; CHECK-LABEL: @pr93017(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IDX:%.*]] to i32
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x ptr], ptr @table, i32 0, i32 [[TMP1]]
-; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[GEP]], align 4
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[V]], null
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 true
;
%gep = getelementptr inbounds [2 x ptr], ptr @table, i64 0, i64 %idx
%v = load ptr, ptr %gep
diff --git a/llvm/test/Transforms/InstCombine/load-global.ll b/llvm/test/Transforms/InstCombine/load-global.ll
new file mode 100644
index 0000000000000..95ce560f0ded9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-global.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+ at arr2 = constant [2 x i32] [i32 0, i32 1], align 4
+ at arr2_mutable = global [4 x i32] [i32 0, i32 0, i32 1, i32 1], align 4
+ at arr2_external = external constant [4 x i32], align 4
+ at arr2_uniform = constant [2 x i32] [i32 1, i32 1], align 4
+ at arr2_undef = constant [2 x i32] [i32 1, i32 undef], align 4
+ at arr3 = constant [3 x i32] [i32 0, i32 1, i32 1], align 4
+ at arr3_alt = constant [3 x i32] [i32 1, i32 0, i32 1], align 4
+ at arr3_uniform = constant [3 x i32] [i32 1, i32 1, i32 1], align 4
+ at arr3_var = constant [3 x i32] [i32 0, i32 3, i32 4], align 4
+ at arr4_multimap = constant [4 x i32] [i32 0, i32 0, i32 1, i32 1], align 4
+
+define i32 @fold_arr2(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[X]], 0
+; CHECK-NEXT: [[VAL:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_uniform(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_uniform(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 1
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_uniform, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[X]], 0
+; CHECK-NEXT: [[VAL:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3_alt(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3_alt(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[X]], 1
+; CHECK-NEXT: [[VAL:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3_alt, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3_uniform(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3_uniform(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 1
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3_uniform, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+; negative tests
+
+define i32 @fold_arr2_mutable(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_mutable(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_mutable, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_mutable, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_external(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_external(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_external, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_external, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_volatile(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_volatile(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0, i64 %x
+ %val = load volatile i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_mismatch_type1(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_mismatch_type1(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i8], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i8], ptr @arr2, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i8 @fold_arr2_mismatch_type2(i64 %x) {
+; CHECK-LABEL: define i8 @fold_arr2_mismatch_type2(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0, i64 %x
+ %val = load i8, ptr %arrayidx, align 4
+ ret i8 %val
+}
+
+define i32 @fold_arr2_bad_gep1(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_bad_gep1(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 1, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 1, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_bad_gep2(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_bad_gep2(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3_var(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3_var(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3_var, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3_var, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr4_multimap(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr4_multimap(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_undef(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_undef(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_undef, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_undef, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/98339
More information about the llvm-commits
mailing list