[llvm] [CVP][LVI] Add support for InsertElementInst in LVI (PR #99368)
Rajat Bajpai via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 02:04:53 PDT 2024
https://github.com/rajatbajpai updated https://github.com/llvm/llvm-project/pull/99368
>From 7166ccd4b97598d97e5742b9331d291b83a2f48c Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Fri, 5 Jul 2024 12:05:34 +0530
Subject: [PATCH 1/2] Currently, the LVI analysis pass doesn't support
InsertElementInst vector instruction. Due to this, some optimization
opportunities are missed. For example, in the below example, ICMP instruction
can be folded but it doesn't.
```
...
%ie1 = insertelement <2 x i32> poison, i32 10, i64 0
%ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1
%icmp = icmp <2 x i1> %ie2, <i32 40, i32 40>
...
```
This change adds InsertElementInst support in the LVI analysis pass to fix the motivating
example.
---
llvm/lib/Analysis/LazyValueInfo.cpp | 36 +++++++++
.../insertelement.ll | 76 +++++++++++++++++++
2 files changed, 112 insertions(+)
create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 92389f2896b8e..d28d4fa47fdae 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -428,6 +428,8 @@ class LazyValueInfoImpl {
std::optional<ValueLatticeElement> solveBlockValueIntrinsic(IntrinsicInst *II,
BasicBlock *BB);
std::optional<ValueLatticeElement>
+ solveBlockValueInsertElement(InsertElementInst *IEI, BasicBlock *BB);
+ std::optional<ValueLatticeElement>
solveBlockValueExtractValue(ExtractValueInst *EVI, BasicBlock *BB);
bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
@@ -657,6 +659,9 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) {
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(BO, BB);
+ if (auto *IEI = dyn_cast<InsertElementInst>(BBI))
+ return solveBlockValueInsertElement(IEI, BB);
+
if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
return solveBlockValueExtractValue(EVI, BB);
@@ -1038,6 +1043,37 @@ LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
MetadataVal);
}
+std::optional<ValueLatticeElement>
+LazyValueInfoImpl::solveBlockValueInsertElement(InsertElementInst *IEI,
+ BasicBlock *BB) {
+ std::optional<ValueLatticeElement> OptEltVal =
+ getBlockValue(IEI->getOperand(1), BB, IEI);
+ if (!OptEltVal)
+ return std::nullopt;
+ ValueLatticeElement &EltVal = *OptEltVal;
+
+ if (auto *CV = dyn_cast<ConstantVector>(IEI->getOperand(0))) {
+ // Must be vector of integers. Merge these elements to create
+ // the range.
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ Constant *Elem = CV->getAggregateElement(i);
+ if (isa<PoisonValue>(Elem))
+ continue;
+ std::optional<ConstantRange> CR = getRangeFor(Elem, IEI, BB);
+ if (!CR)
+ return std::nullopt;
+ EltVal.mergeIn(ValueLatticeElement::getRange(*CR));
+ }
+ } else if (!isa<PoisonValue>(IEI->getOperand(0))) {
+ std::optional<ValueLatticeElement> OptVecResult =
+ solveBlockValueImpl(IEI->getOperand(0), BB);
+ if (!OptVecResult)
+ return std::nullopt;
+ EltVal.mergeIn(*OptVecResult);
+ }
+ return EltVal;
+}
+
std::optional<ValueLatticeElement>
LazyValueInfoImpl::solveBlockValueExtractValue(ExtractValueInst *EVI,
BasicBlock *BB) {
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll b/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
new file mode 100644
index 0000000000000..769f431738342
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
+
+;; Check if ICMP instruction is constant folded or not.
+
+define void @test1(ptr addrspace(1) %out) {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) {
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i32 [[CALL]] to i16
+; CHECK-NEXT: [[UDIV1:%.*]] = udiv i16 [[UDIV_LHS_TRUNC]], 5
+; CHECK-NEXT: [[UDIV_ZEXT:%.*]] = zext i16 [[UDIV1]] to i32
+; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 768
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 896
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 [[ADD1]], i64 0
+; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 [[ADD2]], i64 1
+; CHECK-NEXT: [[EI1:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 0
+; CHECK-NEXT: [[EI2:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 1
+; CHECK-NEXT: [[ADDUP:%.*]] = add i1 [[EI1]], [[EI2]]
+; CHECK-NEXT: [[ADDUP_UPCAST:%.*]] = zext i1 [[ADDUP]] to i32
+; CHECK-NEXT: store i32 [[ADDUP_UPCAST]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
+ %udiv = udiv i32 %call, 5
+ %add1 = add i32 %udiv, 768
+ %add2 = add i32 %udiv, 896
+ %ie1 = insertelement <2 x i32> poison, i32 %add1, i64 0
+ %ie2 = insertelement <2 x i32> %ie1, i32 %add2, i64 1
+ %icmp1 = icmp slt <2 x i32> %ie2, <i32 1024, i32 1024>
+ %ei1 = extractelement <2 x i1> %icmp1, i64 0
+ %ei2 = extractelement <2 x i1> %icmp1, i64 1
+ %addUp = add i1 %ei1, %ei2
+ %addUp.upcast = zext i1 %addUp to i32
+ store i32 %addUp.upcast, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+
+;; Check if LVI is able to handle constant vector operands
+;; in InsertElementInst and CVP is able to fold ICMP instruction.
+
+define void @test2(ptr addrspace(1) %out) {
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) {
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range [[RNG0]]
+; CHECK-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i32 [[CALL]] to i16
+; CHECK-NEXT: [[UDIV1:%.*]] = udiv i16 [[UDIV_LHS_TRUNC]], 5
+; CHECK-NEXT: [[UDIV_ZEXT:%.*]] = zext i16 [[UDIV1]] to i32
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 896
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD2]], i64 0
+; CHECK-NEXT: [[EI1:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 0
+; CHECK-NEXT: [[EI2:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 1
+; CHECK-NEXT: [[ADDUP:%.*]] = add i1 [[EI1]], [[EI2]]
+; CHECK-NEXT: [[ADDUP_UPCAST:%.*]] = zext i1 [[ADDUP]] to i32
+; CHECK-NEXT: store i32 [[ADDUP_UPCAST]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
+ %udiv = udiv i32 %call, 5
+ %add2 = add i32 %udiv, 896
+ %ie1 = insertelement <2 x i32> <i32 poison, i32 1>, i32 %add2, i64 0
+ %icmp1 = icmp slt <2 x i32> %ie1, <i32 1024, i32 1024>
+ %ei1 = extractelement <2 x i1> %icmp1, i64 0
+ %ei2 = extractelement <2 x i1> %icmp1, i64 1
+ %addUp = add i1 %ei1, %ei2
+ %addUp.upcast = zext i1 %addUp to i32
+ store i32 %addUp.upcast, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+
+!1 = !{i32 0, i32 640}
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 640}
+;.
>From 4e223c3ef1b75e09c563295d8f66739a9b4c04ee Mon Sep 17 00:00:00 2001
From: rbajpai <rbajpai at nvidia.com>
Date: Thu, 18 Jul 2024 13:54:32 +0530
Subject: [PATCH 2/2] Addressed review comments. Moved `ConstantVector` logic
inside `getBlockValue` function.
---
llvm/lib/Analysis/LazyValueInfo.cpp | 48 ++++++------
.../insertelement.ll | 76 -------------------
.../CorrelatedValuePropagation/vectors.ll | 25 ++++++
3 files changed, 51 insertions(+), 98 deletions(-)
delete mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index d28d4fa47fdae..8a14fae13327f 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -572,6 +572,23 @@ void LazyValueInfoImpl::solve() {
std::optional<ValueLatticeElement>
LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB,
Instruction *CxtI) {
+ if (auto *CV = dyn_cast<ConstantVector>(Val)) {
+ // Must be vector of integers. Merge these elements to create
+ // the range.
+ ValueLatticeElement Res;
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ Constant *Elem = CV->getAggregateElement(i);
+ if (isa<PoisonValue>(Elem))
+ continue;
+ std::optional<ValueLatticeElement> ElemVal =
+ getBlockValue(Elem, BB, CxtI);
+ if (!ElemVal)
+ return std::nullopt;
+ Res.mergeIn(*ElemVal);
+ }
+ return Res;
+ }
+
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val))
return ValueLatticeElement::get(VC);
@@ -1045,33 +1062,20 @@ LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
std::optional<ValueLatticeElement>
LazyValueInfoImpl::solveBlockValueInsertElement(InsertElementInst *IEI,
- BasicBlock *BB) {
+ BasicBlock *BB) {
std::optional<ValueLatticeElement> OptEltVal =
getBlockValue(IEI->getOperand(1), BB, IEI);
if (!OptEltVal)
return std::nullopt;
- ValueLatticeElement &EltVal = *OptEltVal;
+ ValueLatticeElement &Res = *OptEltVal;
- if (auto *CV = dyn_cast<ConstantVector>(IEI->getOperand(0))) {
- // Must be vector of integers. Merge these elements to create
- // the range.
- for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
- Constant *Elem = CV->getAggregateElement(i);
- if (isa<PoisonValue>(Elem))
- continue;
- std::optional<ConstantRange> CR = getRangeFor(Elem, IEI, BB);
- if (!CR)
- return std::nullopt;
- EltVal.mergeIn(ValueLatticeElement::getRange(*CR));
- }
- } else if (!isa<PoisonValue>(IEI->getOperand(0))) {
- std::optional<ValueLatticeElement> OptVecResult =
- solveBlockValueImpl(IEI->getOperand(0), BB);
- if (!OptVecResult)
- return std::nullopt;
- EltVal.mergeIn(*OptVecResult);
- }
- return EltVal;
+ std::optional<ValueLatticeElement> OptVecVal =
+ getBlockValue(IEI->getOperand(0), BB, IEI);
+ if (!OptVecVal)
+ return std::nullopt;
+
+ Res.mergeIn(*OptVecVal);
+ return Res;
}
std::optional<ValueLatticeElement>
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll b/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
deleted file mode 100644
index 769f431738342..0000000000000
--- a/llvm/test/Transforms/CorrelatedValuePropagation/insertelement.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
-
-;; Check if ICMP instruction is constant folded or not.
-
-define void @test1(ptr addrspace(1) %out) {
-; CHECK-LABEL: define void @test1(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) {
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i32 [[CALL]] to i16
-; CHECK-NEXT: [[UDIV1:%.*]] = udiv i16 [[UDIV_LHS_TRUNC]], 5
-; CHECK-NEXT: [[UDIV_ZEXT:%.*]] = zext i16 [[UDIV1]] to i32
-; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 768
-; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 896
-; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 [[ADD1]], i64 0
-; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 [[ADD2]], i64 1
-; CHECK-NEXT: [[EI1:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 0
-; CHECK-NEXT: [[EI2:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 1
-; CHECK-NEXT: [[ADDUP:%.*]] = add i1 [[EI1]], [[EI2]]
-; CHECK-NEXT: [[ADDUP_UPCAST:%.*]] = zext i1 [[ADDUP]] to i32
-; CHECK-NEXT: store i32 [[ADDUP_UPCAST]], ptr addrspace(1) [[OUT]], align 4
-; CHECK-NEXT: ret void
-;
- %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
- %udiv = udiv i32 %call, 5
- %add1 = add i32 %udiv, 768
- %add2 = add i32 %udiv, 896
- %ie1 = insertelement <2 x i32> poison, i32 %add1, i64 0
- %ie2 = insertelement <2 x i32> %ie1, i32 %add2, i64 1
- %icmp1 = icmp slt <2 x i32> %ie2, <i32 1024, i32 1024>
- %ei1 = extractelement <2 x i1> %icmp1, i64 0
- %ei2 = extractelement <2 x i1> %icmp1, i64 1
- %addUp = add i1 %ei1, %ei2
- %addUp.upcast = zext i1 %addUp to i32
- store i32 %addUp.upcast, ptr addrspace(1) %out, align 4
- ret void
-}
-
-
-;; Check if LVI is able to handle constant vector operands
-;; in InsertElementInst and CVP is able to fold ICMP instruction.
-
-define void @test2(ptr addrspace(1) %out) {
-; CHECK-LABEL: define void @test2(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) {
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range [[RNG0]]
-; CHECK-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i32 [[CALL]] to i16
-; CHECK-NEXT: [[UDIV1:%.*]] = udiv i16 [[UDIV_LHS_TRUNC]], 5
-; CHECK-NEXT: [[UDIV_ZEXT:%.*]] = zext i16 [[UDIV1]] to i32
-; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i32 [[UDIV_ZEXT]], 896
-; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD2]], i64 0
-; CHECK-NEXT: [[EI1:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 0
-; CHECK-NEXT: [[EI2:%.*]] = extractelement <2 x i1> <i1 true, i1 true>, i64 1
-; CHECK-NEXT: [[ADDUP:%.*]] = add i1 [[EI1]], [[EI2]]
-; CHECK-NEXT: [[ADDUP_UPCAST:%.*]] = zext i1 [[ADDUP]] to i32
-; CHECK-NEXT: store i32 [[ADDUP_UPCAST]], ptr addrspace(1) [[OUT]], align 4
-; CHECK-NEXT: ret void
-;
- %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
- %udiv = udiv i32 %call, 5
- %add2 = add i32 %udiv, 896
- %ie1 = insertelement <2 x i32> <i32 poison, i32 1>, i32 %add2, i64 0
- %icmp1 = icmp slt <2 x i32> %ie1, <i32 1024, i32 1024>
- %ei1 = extractelement <2 x i1> %icmp1, i64 0
- %ei2 = extractelement <2 x i1> %icmp1, i64 1
- %addUp = add i1 %ei1, %ei2
- %addUp.upcast = zext i1 %addUp to i32
- store i32 %addUp.upcast, ptr addrspace(1) %out, align 4
- ret void
-}
-
-
-!1 = !{i32 0, i32 640}
-;.
-; CHECK: [[RNG0]] = !{i32 0, i32 640}
-;.
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
index 43e680cd25cdb..079946d1dbb40 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
@@ -327,3 +327,28 @@ join:
%add = add <2 x i16> %phi, <i16 2, i16 3>
ret <2 x i16> %add
}
+
+;; Check if ICMP instruction is constant folded or not.
+define <2 x i1> @insertelement_fold1() {
+; CHECK-LABEL: define <2 x i1> @insertelement_fold1() {
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 10, i64 0
+; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 20, i64 1
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %ie1 = insertelement <2 x i32> poison, i32 10, i64 0
+ %ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1
+ %icmp1 = icmp slt <2 x i32> %ie2, <i32 1024, i32 1024>
+ ret <2 x i1> %icmp1
+}
+
+;; Check if LVI is able to handle constant vector operands
+;; in InsertElementInst and CVP is able to fold ICMP instruction.
+define <2 x i1> @insertelement_fold2() {
+; CHECK-LABEL: define <2 x i1> @insertelement_fold2() {
+; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> <i32 poison, i32 20>, i32 10, i64 0
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %ie1 = insertelement <2 x i32> <i32 poison, i32 20>, i32 10, i64 0
+ %icmp1 = icmp slt <2 x i32> %ie1, <i32 1024, i32 1024>
+ ret <2 x i1> %icmp1
+}
More information about the llvm-commits
mailing list