[llvm] 5a47698 - [InstSimplify] Properly constrain {insert,extract}_subvector intrinsic fold

Fri May 21 03:05:40 PDT 2021

Author: Joe Ellis
Date: 2021-05-21T10:05:03Z
New Revision: 5a476987f7d653eb7ce2d8ba0ddf0125f481117e

URL: https://github.com/llvm/llvm-project/commit/5a476987f7d653eb7ce2d8ba0ddf0125f481117e
DIFF: https://github.com/llvm/llvm-project/commit/5a476987f7d653eb7ce2d8ba0ddf0125f481117e.diff

LOG: [InstSimplify] Properly constrain {insert,extract}_subvector intrinsic fold

The previous rule:

   (insert_vector _, (extract_vector X, 0), 0) -> X

is not quite correct. The correct fold should be:

   (insert_vector Y, (extract_vector X, 0), 0) -> X
   where: Y is X, or Y is undef

This commit updates the pattern.

Reviewed By: peterwaller-arm, paulwalker-arm

Differential Revision: https://reviews.llvm.org/D102699

Added: 
    

Modified: 
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/test/Transforms/InstSimplify/insert-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 08fccc21cf3a2..0f5a5bb637357 100644

--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5734,16 +5734,19 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
     return nullptr;
   }
   case Intrinsic::experimental_vector_insert: {
+    Value *Vec = Call->getArgOperand(0);
     Value *SubVec = Call->getArgOperand(1);
     Value *Idx = Call->getArgOperand(2);
     Type *ReturnType = F->getReturnType();
 
-    // (insert_vector _, (extract_vector X, 0), 0) -> X
+    // (insert_vector Y, (extract_vector X, 0), 0) -> X
+    // where: Y is X, or Y is undef
     unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
     Value *X = nullptr;
     if (match(SubVec, m_Intrinsic<Intrinsic::experimental_vector_extract>(
                           m_Value(X), m_Zero())) &&
-        IdxN == 0 && X->getType() == ReturnType)
+        (Q.isUndefValue(Vec) || Vec == X) && IdxN == 0 &&
+        X->getType() == ReturnType)
       return X;
 
     return nullptr;

diff  --git a/llvm/test/Transforms/InstSimplify/insert-vector.ll b/llvm/test/Transforms/InstSimplify/insert-vector.ll
index 99becf3aad375..c4f65e5b095d1 100644
--- a/llvm/test/Transforms/InstSimplify/insert-vector.ll
+++ b/llvm/test/Transforms/InstSimplify/insert-vector.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
-define <vscale x 16 x i8> @redundant_extract_insert_chain(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+define <vscale x 16 x i8> @redundant_extract_insert_chain(<vscale x 16 x i8> %x) {
 ; CHECK-LABEL: @redundant_extract_insert_chain(
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[X:%.*]]
 ;
@@ -10,8 +10,8 @@ define <vscale x 16 x i8> @redundant_extract_insert_chain(<vscale x 16 x i1> %pg
   ret <vscale x 16 x i8> %inserted
 }
 
-define <vscale x 16 x i8> @non_redundant_extract_insert_chain(<vscale x 16 x i1> %pg, <vscale x 32 x i8> %x) {
-; CHECK-LABEL: @non_redundant_extract_insert_chain(
+define <vscale x 16 x i8> @non_redundant_extract_insert_chain_0(<vscale x 32 x i8> %x) {
+; CHECK-LABEL: @non_redundant_extract_insert_chain_0(
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = call <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv32i8(<vscale x 32 x i8> [[X:%.*]], i64 0)
 ; CHECK-NEXT:    [[INSERTED:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8> undef, <32 x i8> [[EXTRACTED]], i64 0)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[INSERTED]]
@@ -21,6 +21,17 @@ define <vscale x 16 x i8> @non_redundant_extract_insert_chain(<vscale x 16 x i1>
   ret <vscale x 16 x i8> %inserted
 }
 
+define <vscale x 16 x i8> @non_redundant_extract_insert_chain_1(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: @non_redundant_extract_insert_chain_1(
+; CHECK-NEXT:    [[EXTRACTED:%.*]] = call <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv16i8(<vscale x 16 x i8> [[X:%.*]], i64 0)
+; CHECK-NEXT:    [[INSERTED:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8> [[Y:%.*]], <32 x i8> [[EXTRACTED]], i64 0)
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[INSERTED]]
+;
+  %extracted = call <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv16i8(<vscale x 16 x i8> %x, i64 0)
+  %inserted = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8> %y, <32 x i8> %extracted, i64 0)
+  ret <vscale x 16 x i8> %inserted
+}
+
 declare <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv16i8(<vscale x 16 x i8>, i64)
 declare <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv32i8(<vscale x 32 x i8>, i64)
 declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8>, <32 x i8>, i64)