[llvm] [InstCombine][foldPHIArgGEPIntoPHI] Early return for const vector index for gep inst. (PR #138661)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 7 02:09:34 PDT 2025
https://github.com/haonanya1 updated https://github.com/llvm/llvm-project/pull/138661
>From 5569ace09f52e1fac1f35e628b778fc7ae9c1482 Mon Sep 17 00:00:00 2001
From: "Yang, Haonan" <haonan.yang at intel.com>
Date: Tue, 6 May 2025 11:05:15 +0200
Subject: [PATCH 1/3] [InstCombine][foldPHIArgGEPIntoPHI] Early return for
const vector index for gep inst.
---
.../Transforms/InstCombine/InstCombinePHI.cpp | 4 ++-
.../fold-phi-arg-gep-to-phi-negative.ll | 35 +++++++++++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 80308bf92dbbc..d29d5f21a3baa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -575,7 +575,9 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
// variable index could pessimize the path. This also handles the case
// for struct indices, which must always be constant.
if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
- isa<ConstantInt>(GEP->getOperand(Op)))
+ isa<ConstantInt>(GEP->getOperand(Op)) ||
+ isa<ConstantDataVector>(FirstInst->getOperand(Op)) ||
+ isa<ConstantDataVector>(GEP->getOperand(Op)))
return nullptr;
if (FirstInst->getOperand(Op)->getType() !=
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
new file mode 100644
index 0000000000000..1716321c0b4e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+%vec = type { %vec_base }
+%vec_base = type { [4 x float] }
+%foo = type { %vec, %vec}
+
+define void @test(i1 %tobool, ptr addrspace(1) %add.ptr.i) {
+entry:
+ %lane.0 = alloca %foo, align 16
+ %lane.15 = insertelement <16 x ptr> undef, ptr %lane.0, i64 0
+ %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+ %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+ br i1 %tobool, label %f1, label %f0
+
+f0:
+; CHECK: f0:
+; CHECK-NEXT: %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+ br label %merge
+
+f1:
+; CHECK: f1:
+; CHECK-NEXT: %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+ br label %merge
+
+merge:
+; CHECK: merge:
+; CHECK-NEXT: %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0 ], [ %mm_vectorGEP2, %f1 ]
+ %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
+ %wide.masked.gather15 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %vec.phi14, i32 4, <16 x i1> splat (i1 true), <16 x float> poison)
+ %wide.masked.gather15.extract.15. = extractelement <16 x float> %wide.masked.gather15, i32 15
+ store float %wide.masked.gather15.extract.15., ptr addrspace(1) %add.ptr.i, align 4
+ ret void
+}
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32 immarg, <16 x i1>, <16 x float>)
>From 88606204d2fd6e63b9c28c5e0a6b7d8c89b2d42c Mon Sep 17 00:00:00 2001
From: "Yang, Haonan" <haonan.yang at intel.com>
Date: Wed, 7 May 2025 04:28:19 +0200
Subject: [PATCH 2/3] Apply suggestions
---
.../Transforms/InstCombine/InstCombinePHI.cpp | 6 ++--
.../fold-phi-arg-gep-to-phi-negative.ll | 36 +++++++++++--------
2 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index d29d5f21a3baa..a842a5edcb8a3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -574,10 +574,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
// substantially cheaper to compute for the constants, so making it a
// variable index could pessimize the path. This also handles the case
// for struct indices, which must always be constant.
- if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
- isa<ConstantInt>(GEP->getOperand(Op)) ||
- isa<ConstantDataVector>(FirstInst->getOperand(Op)) ||
- isa<ConstantDataVector>(GEP->getOperand(Op)))
+ if (isa<Constant>(FirstInst->getOperand(Op)) ||
+ isa<Constant>(GEP->getOperand(Op)))
return nullptr;
if (FirstInst->getOperand(Op)->getType() !=
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
index 1716321c0b4e7..b570cd1d99d2a 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -1,35 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
%vec = type { %vec_base }
%vec_base = type { [4 x float] }
%foo = type { %vec, %vec}
-define void @test(i1 %tobool, ptr addrspace(1) %add.ptr.i) {
+define void @test(i1 %tobool, ptr addrspace(1) %add.ptr) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[TOBOOL:%.*]], ptr addrspace(1) [[ADD_PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LANE_0:%.*]] = alloca [[FOO:%.*]], align 16
+; CHECK-NEXT: [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
+; CHECK: [[F0]]:
+; CHECK-NEXT: [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+; CHECK-NEXT: br label %[[MERGE:.*]]
+; CHECK: [[F1]]:
+; CHECK-NEXT: [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
+; CHECK-NEXT: store <16 x ptr> [[VEC_PHI]], ptr addrspace(1) [[ADD_PTR]], align 128
+; CHECK-NEXT: ret void
+;
entry:
%lane.0 = alloca %foo, align 16
- %lane.15 = insertelement <16 x ptr> undef, ptr %lane.0, i64 0
+ %lane.15 = insertelement <16 x ptr> poison, ptr %lane.0, i64 0
%mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
%mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
br i1 %tobool, label %f1, label %f0
f0:
-; CHECK: f0:
-; CHECK-NEXT: %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
br label %merge
f1:
-; CHECK: f1:
-; CHECK-NEXT: %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
br label %merge
merge:
-; CHECK: merge:
-; CHECK-NEXT: %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0 ], [ %mm_vectorGEP2, %f1 ]
- %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
- %wide.masked.gather15 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %vec.phi14, i32 4, <16 x i1> splat (i1 true), <16 x float> poison)
- %wide.masked.gather15.extract.15. = extractelement <16 x float> %wide.masked.gather15, i32 15
- store float %wide.masked.gather15.extract.15., ptr addrspace(1) %add.ptr.i, align 4
+ %vec.phi = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
+ store <16 x ptr> %vec.phi, ptr addrspace(1) %add.ptr
ret void
}
-
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32 immarg, <16 x i1>, <16 x float>)
>From abb656492897f5fba2b92c654f98196689895236 Mon Sep 17 00:00:00 2001
From: "Yang, Haonan" <haonan.yang at intel.com>
Date: Wed, 7 May 2025 11:09:17 +0200
Subject: [PATCH 3/3] Update lit.
---
.../fold-phi-arg-gep-to-phi-negative.ll | 25 ++++++++-----------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
index b570cd1d99d2a..4ff5299af9cbe 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -1,33 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-%vec = type { %vec_base }
-%vec_base = type { [4 x float] }
-%foo = type { %vec, %vec}
+%foo = type { i16, i16 }
-define void @test(i1 %tobool, ptr addrspace(1) %add.ptr) {
-; CHECK-LABEL: define void @test(
-; CHECK-SAME: i1 [[TOBOOL:%.*]], ptr addrspace(1) [[ADD_PTR:%.*]]) {
+define <16 x ptr> @test(i1 %tobool) {
+; CHECK-LABEL: define <16 x ptr> @test(
+; CHECK-SAME: i1 [[TOBOOL:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[LANE_0:%.*]] = alloca [[FOO:%.*]], align 16
; CHECK-NEXT: [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
; CHECK-NEXT: br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
; CHECK: [[F0]]:
-; CHECK-NEXT: [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+; CHECK-NEXT: [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> splat (i64 1)
; CHECK-NEXT: br label %[[MERGE:.*]]
; CHECK: [[F1]]:
-; CHECK-NEXT: [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[MERGE]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
-; CHECK-NEXT: store <16 x ptr> [[VEC_PHI]], ptr addrspace(1) [[ADD_PTR]], align 128
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[LANE_15]], %[[F1]] ]
+; CHECK-NEXT: ret <16 x ptr> [[VEC_PHI]]
;
entry:
%lane.0 = alloca %foo, align 16
%lane.15 = insertelement <16 x ptr> poison, ptr %lane.0, i64 0
- %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
- %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+ %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i32> splat (i32 1)
+ %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i32> zeroinitializer
br i1 %tobool, label %f1, label %f0
f0:
@@ -38,6 +34,5 @@ f1:
merge:
%vec.phi = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
- store <16 x ptr> %vec.phi, ptr addrspace(1) %add.ptr
- ret void
+ ret <16 x ptr> %vec.phi
}
More information about the llvm-commits
mailing list