[llvm] [CVP][LVI] Add support for InsertElementInst in LVI (PR #99368)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 11:35:13 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Rajat Bajpai (rajatbajpai)
<details>
<summary>Changes</summary>
Currently, the LVI analysis pass doesn't support InsertElementInst vector instruction. Due to this, some optimization opportunities are missed. For example, in the below example, ICMP instruction can be folded but it doesn't.
```
...
%ie1 = insertelement <2 x i32> poison, i32 10, i64 0
%ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1
%icmp = icmp <2 x i1> %ie2, <i32 40, i32 40>
...
```
This change adds InsertElementInst support in the LVI analysis pass to fix the motivating example.
---
Full diff: https://github.com/llvm/llvm-project/pull/99368.diff
2 Files Affected:
- (modified) llvm/lib/Analysis/LazyValueInfo.cpp (+36)
- (added) llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll (+76)
``````````diff
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 92389f2896b8e..d28d4fa47fdae 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -428,6 +428,8 @@ class LazyValueInfoImpl {
std::optional<ValueLatticeElement> solveBlockValueIntrinsic(IntrinsicInst *II,
BasicBlock *BB);
std::optional<ValueLatticeElement>
+ solveBlockValueInsertElement(InsertElementInst *IEI, BasicBlock *BB);
+ std::optional<ValueLatticeElement>
solveBlockValueExtractValue(ExtractValueInst *EVI, BasicBlock *BB);
bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
@@ -657,6 +659,9 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) {
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(BO, BB);
+ if (auto *IEI = dyn_cast<InsertElementInst>(BBI))
+ return solveBlockValueInsertElement(IEI, BB);
+
if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
return solveBlockValueExtractValue(EVI, BB);
@@ -1038,6 +1043,37 @@ LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
MetadataVal);
}
+std::optional<ValueLatticeElement>
+LazyValueInfoImpl::solveBlockValueInsertElement(InsertElementInst *IEI,
+ BasicBlock *BB) {
+ std::optional<ValueLatticeElement> OptEltVal =
+ getBlockValue(IEI->getOperand(1), BB, IEI);
+ if (!OptEltVal)
+ return std::nullopt;
+ ValueLatticeElement &EltVal = *OptEltVal;
+
+ if (auto *CV = dyn_cast<ConstantVector>(IEI->getOperand(0))) {
+ // Must be vector of integers. Merge these elements to create
+ // the range.
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ Constant *Elem = CV->getAggregateElement(i);
+ if (isa<PoisonValue>(Elem))
+ continue;
+ std::optional<ConstantRange> CR = getRangeFor(Elem, IEI, BB);
+ if (!CR)
+ return std::nullopt;
+ EltVal.mergeIn(ValueLatticeElement::getRange(*CR));
+ }
+ } else if (!isa<PoisonValue>(IEI->getOperand(0))) {
+ std::optional<ValueLatticeElement> OptVecResult =
+ solveBlockValueImpl(IEI->getOperand(0), BB);
+ if (!OptVecResult)
+ return std::nullopt;
+ EltVal.mergeIn(*OptVecResult);
+ }
+ return EltVal;
+}
+
std::optional<ValueLatticeElement>
LazyValueInfoImpl::solveBlockValueExtractValue(ExtractValueInst *EVI,
BasicBlock *BB) {
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll b/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
new file mode 100644
index 0000000000000..769f431738342
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
+
+;; Check if ICMP instruction is constant folded or not.
+
+define void @test1(ptr addrspace(1) %out) {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) {
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i32 [[CALL]] to i16
+; CHECK-NEXT: [[UDIV1:%.*]] = udiv i16 [[UDIV_LHS_TRUNC]], 5
+; CHECK-NEXT: [[UDIV_ZEXT:%.*]] = zext i16 [[UDIV1]] to i32
+; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 768
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 896
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 [[ADD1]], i64 0
+; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 [[ADD2]], i64 1
+; CHECK-NEXT: [[EI1:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 0
+; CHECK-NEXT: [[EI2:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 1
+; CHECK-NEXT: [[ADDUP:%.*]] = add i1 [[EI1]], [[EI2]]
+; CHECK-NEXT: [[ADDUP_UPCAST:%.*]] = zext i1 [[ADDUP]] to i32
+; CHECK-NEXT: store i32 [[ADDUP_UPCAST]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
+ %udiv = udiv i32 %call, 5
+ %add1 = add i32 %udiv, 768
+ %add2 = add i32 %udiv, 896
+ %ie1 = insertelement <2 x i32> poison, i32 %add1, i64 0
+ %ie2 = insertelement <2 x i32> %ie1, i32 %add2, i64 1
+ %icmp1 = icmp slt <2 x i32> %ie2, <i32 1024, i32 1024>
+ %ei1 = extractelement <2 x i1> %icmp1, i64 0
+ %ei2 = extractelement <2 x i1> %icmp1, i64 1
+ %addUp = add i1 %ei1, %ei2
+ %addUp.upcast = zext i1 %addUp to i32
+ store i32 %addUp.upcast, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+
+;; Check if LVI is able to handle constant vector operands
+;; in InsertElementInst and CVP is able to fold ICMP instruction.
+
+define void @test2(ptr addrspace(1) %out) {
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) {
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range [[RNG0]]
+; CHECK-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i32 [[CALL]] to i16
+; CHECK-NEXT: [[UDIV1:%.*]] = udiv i16 [[UDIV_LHS_TRUNC]], 5
+; CHECK-NEXT: [[UDIV_ZEXT:%.*]] = zext i16 [[UDIV1]] to i32
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 896
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD2]], i64 0
+; CHECK-NEXT: [[EI1:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 0
+; CHECK-NEXT: [[EI2:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 1
+; CHECK-NEXT: [[ADDUP:%.*]] = add i1 [[EI1]], [[EI2]]
+; CHECK-NEXT: [[ADDUP_UPCAST:%.*]] = zext i1 [[ADDUP]] to i32
+; CHECK-NEXT: store i32 [[ADDUP_UPCAST]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
+ %udiv = udiv i32 %call, 5
+ %add2 = add i32 %udiv, 896
+ %ie1 = insertelement <2 x i32> <i32 poison, i32 1>, i32 %add2, i64 0
+ %icmp1 = icmp slt <2 x i32> %ie1, <i32 1024, i32 1024>
+ %ei1 = extractelement <2 x i1> %icmp1, i64 0
+ %ei2 = extractelement <2 x i1> %icmp1, i64 1
+ %addUp = add i1 %ei1, %ei2
+ %addUp.upcast = zext i1 %addUp to i32
+ store i32 %addUp.upcast, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+
+!1 = !{i32 0, i32 640}
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 640}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/99368
More information about the llvm-commits
mailing list