[llvm] [InstCombine] Convert load from LUT into a select (PR #98339)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 23:18:44 PDT 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/98339
>From 20af1b84c79ac23c2c988b1457ac64ada9f920f1 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 10 Jul 2024 23:37:25 +0800
Subject: [PATCH 1/6] [InstCombine] Add pre-committ tests. NFC.
---
.../Transforms/InstCombine/load-global.ll | 208 ++++++++++++++++++
1 file changed, 208 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/load-global.ll
diff --git a/llvm/test/Transforms/InstCombine/load-global.ll b/llvm/test/Transforms/InstCombine/load-global.ll
new file mode 100644
index 0000000000000..fe410b3b21cf7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-global.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+ at arr2 = constant [2 x i32] [i32 0, i32 1], align 4
+ at arr2_mutable = global [4 x i32] [i32 0, i32 0, i32 1, i32 1], align 4
+ at arr2_external = external constant [4 x i32], align 4
+ at arr2_uniform = constant [2 x i32] [i32 1, i32 1], align 4
+ at arr3 = constant [3 x i32] [i32 0, i32 1, i32 1], align 4
+ at arr3_alt = constant [3 x i32] [i32 1, i32 0, i32 1], align 4
+ at arr3_uniform = constant [3 x i32] [i32 1, i32 1, i32 1], align 4
+ at arr3_var = constant [3 x i32] [i32 0, i32 3, i32 4], align 4
+ at arr4_multimap = constant [4 x i32] [i32 0, i32 0, i32 1, i32 1], align 4
+
+define i32 @fold_arr2(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_uniform(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_uniform(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_uniform, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_uniform, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3_alt(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3_alt(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3_alt, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3_alt, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3_uniform(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3_uniform(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3_uniform, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3_uniform, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+; negative tests
+
+define i32 @fold_arr2_mutable(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_mutable(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_mutable, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_mutable, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_external(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_external(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_external, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_external, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_volatile(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_volatile(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0, i64 %x
+ %val = load volatile i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_mismatch_type1(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_mismatch_type1(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i8], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i8], ptr @arr2, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i8 @fold_arr2_mismatch_type2(i64 %x) {
+; CHECK-LABEL: define i8 @fold_arr2_mismatch_type2(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0, i64 %x
+ %val = load i8, ptr %arrayidx, align 4
+ ret i8 %val
+}
+
+define i32 @fold_arr2_bad_gep1(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_bad_gep1(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 1, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 1, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr2_bad_gep2(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_bad_gep2(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2, i64 0
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr3_var(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr3_var(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3_var, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [3 x i32], ptr @arr3_var, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+define i32 @fold_arr4_multimap(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr4_multimap(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
>From 116f08f2586d343d85b3415b6ad04166d1b8c5f6 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 10 Jul 2024 23:38:24 +0800
Subject: [PATCH 2/6] [InstCombine] Convert load from LUT into a select
---
.../InstCombineLoadStoreAlloca.cpp | 87 +++++++++++++++++++
llvm/test/Transforms/InstCombine/load-cmp.ll | 8 +-
.../Transforms/InstCombine/load-global.ll | 20 ++---
3 files changed, 97 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 21d5e1dece024..2b5bd64cbe2e5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -998,6 +998,89 @@ static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
return false;
}
+static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
+ if (LI.isVolatile())
+ return nullptr;
+
+ auto *GEP = dyn_cast<GetElementPtrInst>(LI.getPointerOperand());
+ if (!GEP || LI.getType() != GEP->getResultElementType())
+ return nullptr;
+
+ auto *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getValueType() != GEP->getSourceElementType())
+ return nullptr;
+
+ Constant *Init = GV->getInitializer();
+ if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+ return nullptr;
+
+ uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+ // Don't blow up on huge arrays.
+ // This threshold is chosen based on statistics on a dataset
+ // which is collected from real-world applications.
+ constexpr uint64_t MaxArraySize = 16;
+ if (ArrayElementCount > MaxArraySize)
+ return nullptr;
+
+ // Require: GEP GV, 0, i
+ if (GEP->getNumOperands() != 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
+ !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+ isa<Constant>(GEP->getOperand(2)))
+ return nullptr;
+
+ SmallMapVector<Constant *, uint64_t, 2> ValueMap;
+ // MultiMapIdx indicates that this value occurs more than once in the array.
+ constexpr uint64_t MultiMapIdx = static_cast<uint64_t>(-1);
+ uint32_t MultiMapElts = 0;
+ for (uint64_t I = 0; I < ArrayElementCount; ++I) {
+ Constant *Elt = Init->getAggregateElement(I);
+
+ if (auto *It = ValueMap.find(Elt); It != ValueMap.end()) {
+ if (It->second == MultiMapIdx)
+ continue;
+ if (++MultiMapElts == 2)
+ return nullptr;
+ It->second = MultiMapIdx;
+ } else {
+ if (ValueMap.size() == 2)
+ return nullptr;
+ ValueMap.insert(std::make_pair(Elt, I));
+ }
+ }
+
+ // Handle load from uniform arrays.
+ if (ValueMap.size() == 1)
+ return ValueMap.begin()->first;
+
+ // Now we have two unique values in the array. And at least one value
+ // only occurs in Array[Index].
+ assert(ValueMap.size() == 2);
+
+ auto [C1, I1] = *ValueMap.begin();
+ auto [C2, I2] = *ValueMap.rbegin();
+ assert((I1 != MultiMapIdx || I2 != MultiMapIdx) &&
+ "Should have a one to one mapping");
+ Value *TrueArm;
+ Value *FalseArm;
+ uint64_t C;
+ if (I1 != MultiMapIdx) {
+ TrueArm = C1;
+ FalseArm = C2;
+ C = I1;
+ } else {
+ TrueArm = C2;
+ FalseArm = C1;
+ C = I2;
+ }
+
+ Value *Index = GEP->getOperand(2);
+ return Builder.CreateSelect(
+ Builder.CreateICmp(ICmpInst::ICMP_EQ, Index,
+ ConstantInt::get(Index->getType(), C)),
+ TrueArm, FalseArm);
+}
+
Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
@@ -1048,6 +1131,10 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
}
+ // Convert load from a constant lookup table into select
+ if (auto *V = foldLoadFromIndexedGlobal(LI, Builder))
+ return replaceInstUsesWith(LI, V);
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index b956de29e0b8d..1624fa8483a3f 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s
+; RUN: opt -passes="instcombine<max-iterations=2>" -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
i16 73, i16 82, i16 69, i16 68, i16 0]
@@ -340,11 +340,7 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
define i1 @pr93017(i64 %idx) {
; CHECK-LABEL: @pr93017(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[IDX:%.*]] to i32
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x ptr], ptr @table, i32 0, i32 [[TMP1]]
-; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[GEP]], align 4
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[V]], null
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 true
;
%gep = getelementptr inbounds [2 x ptr], ptr @table, i64 0, i64 %idx
%v = load ptr, ptr %gep
diff --git a/llvm/test/Transforms/InstCombine/load-global.ll b/llvm/test/Transforms/InstCombine/load-global.ll
index fe410b3b21cf7..b8b4b05365c79 100644
--- a/llvm/test/Transforms/InstCombine/load-global.ll
+++ b/llvm/test/Transforms/InstCombine/load-global.ll
@@ -15,8 +15,8 @@ define i32 @fold_arr2(i64 %x) {
; CHECK-LABEL: define i32 @fold_arr2(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2, i64 0, i64 [[X]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[X]], 0
+; CHECK-NEXT: [[VAL:%.*]] = zext i1 [[TMP0]] to i32
; CHECK-NEXT: ret i32 [[VAL]]
;
entry:
@@ -29,9 +29,7 @@ define i32 @fold_arr2_uniform(i64 %x) {
; CHECK-LABEL: define i32 @fold_arr2_uniform(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_uniform, i64 0, i64 [[X]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+; CHECK-NEXT: ret i32 1
;
entry:
%arrayidx = getelementptr [2 x i32], ptr @arr2_uniform, i64 0, i64 %x
@@ -43,8 +41,8 @@ define i32 @fold_arr3(i64 %x) {
; CHECK-LABEL: define i32 @fold_arr3(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3, i64 0, i64 [[X]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[X]], 0
+; CHECK-NEXT: [[VAL:%.*]] = zext i1 [[TMP0]] to i32
; CHECK-NEXT: ret i32 [[VAL]]
;
entry:
@@ -57,8 +55,8 @@ define i32 @fold_arr3_alt(i64 %x) {
; CHECK-LABEL: define i32 @fold_arr3_alt(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3_alt, i64 0, i64 [[X]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[X]], 1
+; CHECK-NEXT: [[VAL:%.*]] = zext i1 [[TMP0]] to i32
; CHECK-NEXT: ret i32 [[VAL]]
;
entry:
@@ -71,9 +69,7 @@ define i32 @fold_arr3_uniform(i64 %x) {
; CHECK-LABEL: define i32 @fold_arr3_uniform(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [3 x i32], ptr @arr3_uniform, i64 0, i64 [[X]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+; CHECK-NEXT: ret i32 1
;
entry:
%arrayidx = getelementptr [3 x i32], ptr @arr3_uniform, i64 0, i64 %x
>From 555642ea5b3712394d54eb776e1ef479f38f2735 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 11 Jul 2024 01:09:11 +0800
Subject: [PATCH 3/6] [InstCombine] Skip undef values
---
.../InstCombine/InstCombineLoadStoreAlloca.cpp | 4 ++++
llvm/test/Transforms/InstCombine/load-global.ll | 15 +++++++++++++++
2 files changed, 19 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 2b5bd64cbe2e5..d2d3096b4c7d3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1036,6 +1036,10 @@ static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
for (uint64_t I = 0; I < ArrayElementCount; ++I) {
Constant *Elt = Init->getAggregateElement(I);
+ // bail out if the array contains undef values
+ if (isa<UndefValue>(Elt))
+ return nullptr;
+
if (auto *It = ValueMap.find(Elt); It != ValueMap.end()) {
if (It->second == MultiMapIdx)
continue;
diff --git a/llvm/test/Transforms/InstCombine/load-global.ll b/llvm/test/Transforms/InstCombine/load-global.ll
index b8b4b05365c79..95ce560f0ded9 100644
--- a/llvm/test/Transforms/InstCombine/load-global.ll
+++ b/llvm/test/Transforms/InstCombine/load-global.ll
@@ -5,6 +5,7 @@
@arr2_mutable = global [4 x i32] [i32 0, i32 0, i32 1, i32 1], align 4
@arr2_external = external constant [4 x i32], align 4
@arr2_uniform = constant [2 x i32] [i32 1, i32 1], align 4
+ at arr2_undef = constant [2 x i32] [i32 1, i32 undef], align 4
@arr3 = constant [3 x i32] [i32 0, i32 1, i32 1], align 4
@arr3_alt = constant [3 x i32] [i32 1, i32 0, i32 1], align 4
@arr3_uniform = constant [3 x i32] [i32 1, i32 1, i32 1], align 4
@@ -202,3 +203,17 @@ entry:
%val = load i32, ptr %arrayidx, align 4
ret i32 %val
}
+
+define i32 @fold_arr2_undef(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_undef(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [2 x i32], ptr @arr2_undef, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [2 x i32], ptr @arr2_undef, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
>From 6a6f6b284b8624a4df691b327614251483a10fa2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 11 Jul 2024 06:46:23 +0800
Subject: [PATCH 4/6] [InstCombine] Address review comemnts. NFC.
---
.../InstCombineLoadStoreAlloca.cpp | 28 +++++++++++++------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index d2d3096b4c7d3..e5a0ac4e55370 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1003,18 +1003,21 @@ static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
return nullptr;
auto *GEP = dyn_cast<GetElementPtrInst>(LI.getPointerOperand());
- if (!GEP || LI.getType() != GEP->getResultElementType())
+ if (!GEP)
return nullptr;
auto *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GV->getValueType() != GEP->getSourceElementType())
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return nullptr;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
+ Type *EltTy = Init->getType()->getArrayElementType();
+ if (EltTy != LI.getType())
+ return nullptr;
+
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
// Don't blow up on huge arrays.
// This threshold is chosen based on statistics on a dataset
@@ -1023,10 +1026,20 @@ static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
if (ArrayElementCount > MaxArraySize)
return nullptr;
- // Require: GEP GV, 0, i
- if (GEP->getNumOperands() != 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
- !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
- isa<Constant>(GEP->getOperand(2)))
+ auto &DL = LI.getDataLayout();
+ uint64_t IndexBW = DL.getIndexTypeSizeInBits(GEP->getType());
+ APInt ConstOffset(IndexBW, 0);
+ MapVector<Value *, APInt> VariableOffsets;
+ if (!GEP->collectOffset(DL, IndexBW, VariableOffsets, ConstOffset))
+ return nullptr;
+
+ if (!ConstOffset.isZero() || VariableOffsets.size() != 1 ||
+ VariableOffsets.front().second !=
+ DL.getTypeAllocSize(EltTy).getFixedValue())
+ return nullptr;
+
+ Value *Index = VariableOffsets.front().first;
+ if (Index->getType()->getScalarSizeInBits() != IndexBW)
return nullptr;
SmallMapVector<Constant *, uint64_t, 2> ValueMap;
@@ -1078,7 +1091,6 @@ static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
C = I2;
}
- Value *Index = GEP->getOperand(2);
return Builder.CreateSelect(
Builder.CreateICmp(ICmpInst::ICMP_EQ, Index,
ConstantInt::get(Index->getType(), C)),
>From 44d7d300c173186e5e2a634f21f79406c60fcb16 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 11 Jul 2024 13:51:08 +0800
Subject: [PATCH 5/6] [InstCombine] Add more tests. NFC.
---
llvm/test/Transforms/InstCombine/load-global.ll | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/load-global.ll b/llvm/test/Transforms/InstCombine/load-global.ll
index 95ce560f0ded9..e5732d33be817 100644
--- a/llvm/test/Transforms/InstCombine/load-global.ll
+++ b/llvm/test/Transforms/InstCombine/load-global.ll
@@ -78,6 +78,23 @@ entry:
ret i32 %val
}
+; TODO: Handle ptradd pattern
+define i32 @fold_arr2_i8(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr2_i8(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[OFFSET:%.*]] = shl nuw nsw i64 [[X]], 2
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr @arr2, i64 [[OFFSET]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %offset = mul nsw nuw i64 %x, 4
+ %arrayidx = getelementptr i8, ptr @arr2, i64 %offset
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
; negative tests
define i32 @fold_arr2_mutable(i64 %x) {
>From b8f26a86626ebd4f03eaa25db33470a55763e343 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 11 Jul 2024 14:13:33 +0800
Subject: [PATCH 6/6] [InstCombine] Address review comments. NFC.
---
.../InstCombineLoadStoreAlloca.cpp | 2 +-
.../Transforms/InstCombine/load-global.ll | 29 ++++++++++---------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e5a0ac4e55370..0e644aa10031a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1053,7 +1053,7 @@ static Value *foldLoadFromIndexedGlobal(LoadInst &LI, IRBuilderBase &Builder) {
if (isa<UndefValue>(Elt))
return nullptr;
- if (auto *It = ValueMap.find(Elt); It != ValueMap.end()) {
+ if (auto It = ValueMap.find(Elt); It != ValueMap.end()) {
if (It->second == MultiMapIdx)
continue;
if (++MultiMapElts == 2)
diff --git a/llvm/test/Transforms/InstCombine/load-global.ll b/llvm/test/Transforms/InstCombine/load-global.ll
index e5732d33be817..f06aa5955c832 100644
--- a/llvm/test/Transforms/InstCombine/load-global.ll
+++ b/llvm/test/Transforms/InstCombine/load-global.ll
@@ -95,6 +95,21 @@ entry:
ret i32 %val
}
+; TODO: can be folded into x < 3 ? 0 : 1
+define i32 @fold_arr4_multimap(i64 %x) {
+; CHECK-LABEL: define i32 @fold_arr4_multimap(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 [[X]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %arrayidx = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 %x
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
; negative tests
define i32 @fold_arr2_mutable(i64 %x) {
@@ -207,20 +222,6 @@ entry:
ret i32 %val
}
-define i32 @fold_arr4_multimap(i64 %x) {
-; CHECK-LABEL: define i32 @fold_arr4_multimap(
-; CHECK-SAME: i64 [[X:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 [[X]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
-;
-entry:
- %arrayidx = getelementptr [4 x i32], ptr @arr4_multimap, i64 0, i64 %x
- %val = load i32, ptr %arrayidx, align 4
- ret i32 %val
-}
-
define i32 @fold_arr2_undef(i64 %x) {
; CHECK-LABEL: define i32 @fold_arr2_undef(
; CHECK-SAME: i64 [[X:%.*]]) {
More information about the llvm-commits
mailing list