[llvm] [CVP][LVI] Add support for InsertElementInst in LVI (PR #99368)
Rajat Bajpai via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 10:04:26 PDT 2024
https://github.com/rajatbajpai updated https://github.com/llvm/llvm-project/pull/99368
>From 0dfc0f9cbfd93b96a8eabb56ed62715ebf7dc754 Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Fri, 5 Jul 2024 12:05:34 +0530
Subject: [PATCH] [CVP][LVI] Add support for InsertElementInst in LVI
Currently, the LVI analysis pass doesn't support InsertElementInst vector instruction.
Due to this, some optimization opportunities are missed. For example, in the below example,
ICMP instruction can be folded but it doesn't.
```
...
%ie1 = insertelement <2 x i32> poison, i32 10, i64 0
%ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1
%icmp = icmp <2 x i1> %ie2, <i32 40, i32 40>
...
```
This change adds InsertElementInst support in the LVI analysis pass to fix the motivating
example.
---
llvm/lib/Analysis/LazyValueInfo.cpp | 23 +++++++++++++++++
.../CorrelatedValuePropagation/vectors.ll | 25 +++++++++++++++++++
2 files changed, 48 insertions(+)
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 92389f2896b8e..4c023ed5ed8f0 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -428,6 +428,8 @@ class LazyValueInfoImpl {
std::optional<ValueLatticeElement> solveBlockValueIntrinsic(IntrinsicInst *II,
BasicBlock *BB);
std::optional<ValueLatticeElement>
+ solveBlockValueInsertElement(InsertElementInst *IEI, BasicBlock *BB);
+ std::optional<ValueLatticeElement>
solveBlockValueExtractValue(ExtractValueInst *EVI, BasicBlock *BB);
bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
@@ -657,6 +659,9 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) {
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(BO, BB);
+ if (auto *IEI = dyn_cast<InsertElementInst>(BBI))
+ return solveBlockValueInsertElement(IEI, BB);
+
if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
return solveBlockValueExtractValue(EVI, BB);
@@ -1038,6 +1043,24 @@ LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
MetadataVal);
}
+std::optional<ValueLatticeElement>
+LazyValueInfoImpl::solveBlockValueInsertElement(InsertElementInst *IEI,
+ BasicBlock *BB) {
+ std::optional<ValueLatticeElement> OptEltVal =
+ getBlockValue(IEI->getOperand(1), BB, IEI);
+ if (!OptEltVal)
+ return std::nullopt;
+ ValueLatticeElement &Res = *OptEltVal;
+
+ std::optional<ValueLatticeElement> OptVecVal =
+ getBlockValue(IEI->getOperand(0), BB, IEI);
+ if (!OptVecVal)
+ return std::nullopt;
+
+ Res.mergeIn(*OptVecVal);
+ return Res;
+}
+
std::optional<ValueLatticeElement>
LazyValueInfoImpl::solveBlockValueExtractValue(ExtractValueInst *EVI,
BasicBlock *BB) {
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
index 6254b54d42554..351a2c79cdff4 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
@@ -327,3 +327,28 @@ join:
%add = add <2 x i16> %phi, <i16 2, i16 3>
ret <2 x i16> %add
}
+
+;; Check if ICMP instruction is constant folded or not.
+define <2 x i1> @insertelement_fold1() {
+; CHECK-LABEL: define <2 x i1> @insertelement_fold1() {
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 10, i64 0
+; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 20, i64 1
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %ie1 = insertelement <2 x i32> poison, i32 10, i64 0
+ %ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1
+ %icmp1 = icmp slt <2 x i32> %ie2, <i32 1024, i32 1024>
+ ret <2 x i1> %icmp1
+}
+
+;; Check if LVI is able to handle constant vector operands
+;; in InsertElementInst and CVP is able to fold ICMP instruction.
+define <2 x i1> @insertelement_fold2() {
+; CHECK-LABEL: define <2 x i1> @insertelement_fold2() {
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> <i32 poison, i32 20>, i32 10, i64 0
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %ie1 = insertelement <2 x i32> <i32 poison, i32 20>, i32 10, i64 0
+ %icmp1 = icmp slt <2 x i32> %ie1, <i32 1024, i32 1024>
+ ret <2 x i1> %icmp1
+}
More information about the llvm-commits
mailing list