[llvm] [InstCombine][foldPHIArgGEPIntoPHI] Early return for const vector index for gep inst. (PR #138661)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 7 02:09:34 PDT 2025


https://github.com/haonanya1 updated https://github.com/llvm/llvm-project/pull/138661

>From 5569ace09f52e1fac1f35e628b778fc7ae9c1482 Mon Sep 17 00:00:00 2001
From: "Yang, Haonan" <haonan.yang at intel.com>
Date: Tue, 6 May 2025 11:05:15 +0200
Subject: [PATCH 1/3] [InstCombine][foldPHIArgGEPIntoPHI] Early return for
 const vector index for gep inst.

---
 .../Transforms/InstCombine/InstCombinePHI.cpp |  4 ++-
 .../fold-phi-arg-gep-to-phi-negative.ll       | 35 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 80308bf92dbbc..d29d5f21a3baa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -575,7 +575,9 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
       // variable index could pessimize the path.  This also handles the case
       // for struct indices, which must always be constant.
       if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
-          isa<ConstantInt>(GEP->getOperand(Op)))
+          isa<ConstantInt>(GEP->getOperand(Op)) ||
+          isa<ConstantDataVector>(FirstInst->getOperand(Op)) ||
+          isa<ConstantDataVector>(GEP->getOperand(Op)))
         return nullptr;
 
       if (FirstInst->getOperand(Op)->getType() !=
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
new file mode 100644
index 0000000000000..1716321c0b4e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+%vec = type { %vec_base }
+%vec_base = type { [4 x float] }
+%foo = type { %vec, %vec}
+
+define void @test(i1 %tobool, ptr addrspace(1) %add.ptr.i) {
+entry:
+  %lane.0 = alloca %foo, align 16
+  %lane.15 = insertelement <16 x ptr> undef, ptr %lane.0, i64 0
+  %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+  %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+  br i1 %tobool, label %f1, label %f0
+
+f0:
+; CHECK: f0:
+; CHECK-NEXT: %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+  br label %merge
+
+f1:
+; CHECK: f1:
+; CHECK-NEXT: %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+  br label %merge
+
+merge:
+; CHECK: merge:
+; CHECK-NEXT: %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0 ], [ %mm_vectorGEP2, %f1 ]
+  %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
+  %wide.masked.gather15 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %vec.phi14, i32 4, <16 x i1> splat (i1 true), <16 x float> poison)
+  %wide.masked.gather15.extract.15. = extractelement <16 x float> %wide.masked.gather15, i32 15
+  store float %wide.masked.gather15.extract.15., ptr addrspace(1) %add.ptr.i, align 4
+  ret void
+}
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32 immarg, <16 x i1>, <16 x float>)

>From 88606204d2fd6e63b9c28c5e0a6b7d8c89b2d42c Mon Sep 17 00:00:00 2001
From: "Yang, Haonan" <haonan.yang at intel.com>
Date: Wed, 7 May 2025 04:28:19 +0200
Subject: [PATCH 2/3] Apply suggestions

---
 .../Transforms/InstCombine/InstCombinePHI.cpp |  6 ++--
 .../fold-phi-arg-gep-to-phi-negative.ll       | 36 +++++++++++--------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index d29d5f21a3baa..a842a5edcb8a3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -574,10 +574,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
       // substantially cheaper to compute for the constants, so making it a
       // variable index could pessimize the path.  This also handles the case
       // for struct indices, which must always be constant.
-      if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
-          isa<ConstantInt>(GEP->getOperand(Op)) ||
-          isa<ConstantDataVector>(FirstInst->getOperand(Op)) ||
-          isa<ConstantDataVector>(GEP->getOperand(Op)))
+      if (isa<Constant>(FirstInst->getOperand(Op)) ||
+          isa<Constant>(GEP->getOperand(Op)))
         return nullptr;
 
       if (FirstInst->getOperand(Op)->getType() !=
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
index 1716321c0b4e7..b570cd1d99d2a 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -1,35 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 %vec = type { %vec_base }
 %vec_base = type { [4 x float] }
 %foo = type { %vec, %vec}
 
-define void @test(i1 %tobool, ptr addrspace(1) %add.ptr.i) {
+define void @test(i1 %tobool, ptr addrspace(1) %add.ptr) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[TOBOOL:%.*]], ptr addrspace(1) [[ADD_PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LANE_0:%.*]] = alloca [[FOO:%.*]], align 16
+; CHECK-NEXT:    [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
+; CHECK:       [[F0]]:
+; CHECK-NEXT:    [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+; CHECK-NEXT:    br label %[[MERGE:.*]]
+; CHECK:       [[F1]]:
+; CHECK-NEXT:    [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+; CHECK-NEXT:    br label %[[MERGE]]
+; CHECK:       [[MERGE]]:
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
+; CHECK-NEXT:    store <16 x ptr> [[VEC_PHI]], ptr addrspace(1) [[ADD_PTR]], align 128
+; CHECK-NEXT:    ret void
+;
 entry:
   %lane.0 = alloca %foo, align 16
-  %lane.15 = insertelement <16 x ptr> undef, ptr %lane.0, i64 0
+  %lane.15 = insertelement <16 x ptr> poison, ptr %lane.0, i64 0
   %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
   %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
   br i1 %tobool, label %f1, label %f0
 
 f0:
-; CHECK: f0:
-; CHECK-NEXT: %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
   br label %merge
 
 f1:
-; CHECK: f1:
-; CHECK-NEXT: %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
   br label %merge
 
 merge:
-; CHECK: merge:
-; CHECK-NEXT: %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0 ], [ %mm_vectorGEP2, %f1 ]
-  %vec.phi14 = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
-  %wide.masked.gather15 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %vec.phi14, i32 4, <16 x i1> splat (i1 true), <16 x float> poison)
-  %wide.masked.gather15.extract.15. = extractelement <16 x float> %wide.masked.gather15, i32 15
-  store float %wide.masked.gather15.extract.15., ptr addrspace(1) %add.ptr.i, align 4
+  %vec.phi = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
+  store <16 x ptr> %vec.phi, ptr addrspace(1) %add.ptr
   ret void
 }
-
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32 immarg, <16 x i1>, <16 x float>)

>From abb656492897f5fba2b92c654f98196689895236 Mon Sep 17 00:00:00 2001
From: "Yang, Haonan" <haonan.yang at intel.com>
Date: Wed, 7 May 2025 11:09:17 +0200
Subject: [PATCH 3/3] Update lit.

---
 .../fold-phi-arg-gep-to-phi-negative.ll       | 25 ++++++++-----------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
index b570cd1d99d2a..4ff5299af9cbe 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -1,33 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-%vec = type { %vec_base }
-%vec_base = type { [4 x float] }
-%foo = type { %vec, %vec}
+%foo = type { i16, i16 }
 
-define void @test(i1 %tobool, ptr addrspace(1) %add.ptr) {
-; CHECK-LABEL: define void @test(
-; CHECK-SAME: i1 [[TOBOOL:%.*]], ptr addrspace(1) [[ADD_PTR:%.*]]) {
+define <16 x ptr> @test(i1 %tobool) {
+; CHECK-LABEL: define <16 x ptr> @test(
+; CHECK-SAME: i1 [[TOBOOL:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[LANE_0:%.*]] = alloca [[FOO:%.*]], align 16
 ; CHECK-NEXT:    [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
 ; CHECK:       [[F0]]:
-; CHECK-NEXT:    [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+; CHECK-NEXT:    [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> splat (i64 1)
 ; CHECK-NEXT:    br label %[[MERGE:.*]]
 ; CHECK:       [[F1]]:
-; CHECK-NEXT:    [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
 ; CHECK-NEXT:    br label %[[MERGE]]
 ; CHECK:       [[MERGE]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
-; CHECK-NEXT:    store <16 x ptr> [[VEC_PHI]], ptr addrspace(1) [[ADD_PTR]], align 128
-; CHECK-NEXT:    ret void
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[LANE_15]], %[[F1]] ]
+; CHECK-NEXT:    ret <16 x ptr> [[VEC_PHI]]
 ;
 entry:
   %lane.0 = alloca %foo, align 16
   %lane.15 = insertelement <16 x ptr> poison, ptr %lane.0, i64 0
-  %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1), <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
-  %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i64> splat (i64 1)
+  %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i32> splat (i32 1)
+  %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i32> zeroinitializer
   br i1 %tobool, label %f1, label %f0
 
 f0:
@@ -38,6 +34,5 @@ f1:
 
 merge:
   %vec.phi = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
-  store <16 x ptr> %vec.phi, ptr addrspace(1) %add.ptr
-  ret void
+  ret <16 x ptr> %vec.phi
 }



More information about the llvm-commits mailing list