[llvm] [Instcombine] Combine extractelement from a vector_extract at index 0 (PR #151491)

Thu Jul 31 06:07:31 PDT 2025

https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/151491

>From 03413139be5f006859c933203922c7f63bfd2a60 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 17 Jul 2025 08:49:24 +0000
Subject: [PATCH 1/3] - Add a test for extractelement from scalable
 vector_extract at index 0

---
 .../InstCombine/scalable-extract-subvec-elt.ll     | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll

diff --git a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
new file mode 100644
index 0000000000000..d0cfc48420b43
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define i1 @scalable_test(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: define i1 @scalable_test(
+; CHECK-SAME: <vscale x 4 x i1> [[A:%.*]]) {
+; CHECK-NEXT:    [[SUBVEC:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1(<vscale x 4 x i1> [[A]], i64 0)
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 2 x i1> [[SUBVEC]], i64 1
+; CHECK-NEXT:    ret i1 [[ELT]]
+;
+  %subvec = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1.i64(<vscale x 4 x i1> %a, i64 0)
+  %elt = extractelement <vscale x 2 x i1> %subvec, i32 1
+  ret i1 %elt
+}

>From d93bdead6c6d36907c3ed539407736070792f92b Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 17 Jul 2025 08:51:47 +0000
Subject: [PATCH 2/3] [Instcombine] Combine extractelement from a
 vector_extract at index 0

Extracting any element from a subvector starting at index 0 is
equivalent to extracting from the original vector, i.e.

  extract_elt(vector_extract(x, 0), y) -> extract_elt(x, y)
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp    | 6 +++++-
 .../Transforms/InstCombine/scalable-extract-subvec-elt.ll   | 3 +--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 00b877b8a07ef..6f2adba9e3f6b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -444,7 +444,11 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
         else
           Idx = PoisonValue::get(Ty);
         return replaceInstUsesWith(EI, Idx);
-      }
+      } else if (IID == Intrinsic::vector_extract)
+        // If II is a subvector starting at index 0, extract from the wider
+        // source vector
+        if (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 0)
+          return ExtractElementInst::Create(II->getArgOperand(0), Index);
     }
 
     // InstSimplify should handle cases where the index is invalid.
diff --git a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
index d0cfc48420b43..38cbeb0df98bd 100644
--- a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
+++ b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
@@ -4,8 +4,7 @@
 define i1 @scalable_test(<vscale x 4 x i1> %a) {
 ; CHECK-LABEL: define i1 @scalable_test(
 ; CHECK-SAME: <vscale x 4 x i1> [[A:%.*]]) {
-; CHECK-NEXT:    [[SUBVEC:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1(<vscale x 4 x i1> [[A]], i64 0)
-; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 2 x i1> [[SUBVEC]], i64 1
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 4 x i1> [[A]], i64 1
 ; CHECK-NEXT:    ret i1 [[ELT]]
 ;
   %subvec = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1.i64(<vscale x 4 x i1> %a, i64 0)

>From d9b7aa9557075c166a34ac7dc9ba4d77c2ac36d4 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 31 Jul 2025 12:26:50 +0000
Subject: [PATCH 3/3] - Add variable index & negative tests

---
 .../InstCombine/InstCombineVectorOps.cpp      | 15 ++++++-----
 .../scalable-extract-subvec-elt.ll            | 27 +++++++++++++++++--
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 6f2adba9e3f6b..cb188ea9a68ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -419,6 +419,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
   // If extracting a specified index from the vector, see if we can recursively
   // find a previously computed scalar that was inserted into the vector.
   auto *IndexC = dyn_cast<ConstantInt>(Index);
+  auto *II = dyn_cast<IntrinsicInst>(SrcVec);
   bool HasKnownValidIndex = false;
   if (IndexC) {
     // Canonicalize type of constant indices to i64 to simplify CSE
@@ -429,7 +430,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
     unsigned NumElts = EC.getKnownMinValue();
     HasKnownValidIndex = IndexC->getValue().ult(NumElts);
 
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
+    if (II) {
       Intrinsic::ID IID = II->getIntrinsicID();
       // Index needs to be lower than the minimum size of the vector, because
       // for scalable vector, the vector size is known at run time.
@@ -444,11 +445,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
         else
           Idx = PoisonValue::get(Ty);
         return replaceInstUsesWith(EI, Idx);
-      } else if (IID == Intrinsic::vector_extract)
-        // If II is a subvector starting at index 0, extract from the wider
-        // source vector
-        if (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 0)
-          return ExtractElementInst::Create(II->getArgOperand(0), Index);
+      }
     }
 
     // InstSimplify should handle cases where the index is invalid.
@@ -466,6 +463,12 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
         return ScalarPHI;
   }
 
+  // If SrcVec is a subvector starting at index 0, extract from the
+  // wider source vector
+  if (II && II->getIntrinsicID() == Intrinsic::vector_extract)
+    if (cast<ConstantInt>(II->getArgOperand(1))->isZero())
+      return ExtractElementInst::Create(II->getArgOperand(0), Index);
+
   // TODO come up with a n-ary matcher that subsumes both unary and
   // binary matchers.
   UnaryOperator *UO;
diff --git a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
index 38cbeb0df98bd..1e089e1168f66 100644
--- a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
+++ b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
-define i1 @scalable_test(<vscale x 4 x i1> %a) {
-; CHECK-LABEL: define i1 @scalable_test(
+define i1 @extract_const_idx(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: define i1 @extract_const_idx(
 ; CHECK-SAME: <vscale x 4 x i1> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 4 x i1> [[A]], i64 1
 ; CHECK-NEXT:    ret i1 [[ELT]]
@@ -11,3 +11,26 @@ define i1 @scalable_test(<vscale x 4 x i1> %a) {
   %elt = extractelement <vscale x 2 x i1> %subvec, i32 1
   ret i1 %elt
 }
+
+define float @extract_variable_idx(<vscale x 4 x float> %a, i32 %idx) {
+; CHECK-LABEL: define float @extract_variable_idx(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 4 x float> [[A]], i32 [[IDX]]
+; CHECK-NEXT:    ret float [[ELT]]
+;
+  %subvec = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32.i64(<vscale x 4 x float> %a, i64 0)
+  %elt = extractelement <vscale x 2 x float> %subvec, i32 %idx
+  ret float %elt
+}
+
+define float @negative_test(<vscale x 4 x float> %a) {
+; CHECK-LABEL: define float @negative_test(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) {
+; CHECK-NEXT:    [[SUBVEC:%.*]] = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> [[A]], i64 2)
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 2 x float> [[SUBVEC]], i64 1
+; CHECK-NEXT:    ret float [[ELT]]
+;
+  %subvec = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32.i64(<vscale x 4 x float> %a, i64 2)
+  %elt = extractelement <vscale x 2 x float> %subvec, i32 1
+  ret float %elt
+}