[llvm] [VectorCombine] Add support for zext/sext/trunc to shuffleToIdentity (PR #92696)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 06:57:23 PDT 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/92696

>From d44901edc58cbfd04899b1f476afb953af65195d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 19 May 2024 13:51:04 +0100
Subject: [PATCH 1/2] [VectorCombine] Add support for zext/sext/trunc to
 shuffleToIdentity

This is one of the simple additions to shuffleToIdentity that help it look
through intermediate zext/sext instructions. The other change here is the
removal of a check that both operands of the original shuffle are instructions,
which is a relic from a previous version.
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 10 ++-
 .../AArch64/shuffletoidentity.ll              | 63 +++++--------------
 2 files changed, 23 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 15deaf908422d..751780b8794b8 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1673,8 +1673,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
 // do so.
 bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
   auto *Ty = dyn_cast<FixedVectorType>(I.getType());
-  if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
-      !isa<Instruction>(I.getOperand(1)))
+  if (!Ty)
     return false;
 
   using InstLane = std::pair<Value *, int>;
@@ -1773,7 +1772,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
         !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
-    } else if (isa<UnaryOperator>(Item[0].first)) {
+    } else if (isa<UnaryOperator>(Item[0].first) ||
+               isa<TruncInst>(Item[0].first) || isa<ZExtInst>(Item[0].first) ||
+               isa<SExtInst>(Item[0].first)) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
     } else if (auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
                II && isTriviallyVectorizable(II->getIntrinsicID())) {
@@ -1834,6 +1835,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
     if (auto BI = dyn_cast<BinaryOperator>(I))
       return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
                                  Ops[0], Ops[1]);
+    if (auto CI = dyn_cast<CastInst>(I))
+      return Builder.CreateCast((Instruction::CastOps)CI->getOpcode(), Ops[0],
+                                DstTy);
     if (II)
       return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
     assert(isa<UnaryInstruction>(I) &&
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index bb333941abf70..441d797597da6 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -432,19 +432,10 @@ define <8 x half> @fma(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 
 define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) {
 ; CHECK-LABEL: @exttrunc(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[AB1:%.*]] = zext <4 x i32> [[AB]] to <4 x i64>
-; CHECK-NEXT:    [[AT1:%.*]] = zext <4 x i32> [[AT]] to <4 x i64>
-; CHECK-NEXT:    [[BB1:%.*]] = sext <4 x i32> [[BB]] to <4 x i64>
-; CHECK-NEXT:    [[BT1:%.*]] = sext <4 x i32> [[BT]] to <4 x i64>
-; CHECK-NEXT:    [[ABB:%.*]] = add <4 x i64> [[AB1]], [[BB1]]
-; CHECK-NEXT:    [[ABT:%.*]] = add <4 x i64> [[AT1]], [[BT1]]
-; CHECK-NEXT:    [[ABB1:%.*]] = trunc <4 x i64> [[ABB]] to <4 x i32>
-; CHECK-NEXT:    [[ABT1:%.*]] = trunc <4 x i64> [[ABT]] to <4 x i32>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[A:%.*]] to <8 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i32> [[B:%.*]] to <8 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i32>
 ; CHECK-NEXT:    store <8 x i32> [[R]], ptr [[P:%.*]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -467,17 +458,9 @@ define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) {
 
 define void @zext(<8 x i16> %a, <8 x i16> %b, ptr %p) {
 ; CHECK-LABEL: @zext(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[AB1:%.*]] = zext <4 x i16> [[AB]] to <4 x i32>
-; CHECK-NEXT:    [[AT1:%.*]] = zext <4 x i16> [[AT]] to <4 x i32>
-; CHECK-NEXT:    [[BB1:%.*]] = zext <4 x i16> [[BB]] to <4 x i32>
-; CHECK-NEXT:    [[BT1:%.*]] = zext <4 x i16> [[BT]] to <4 x i32>
-; CHECK-NEXT:    [[ABB:%.*]] = add <4 x i32> [[AB1]], [[BB1]]
-; CHECK-NEXT:    [[ABT:%.*]] = add <4 x i32> [[AT1]], [[BT1]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[ABB]], <4 x i32> [[ABT]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i16> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i16> [[B:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    store <8 x i32> [[R]], ptr [[P:%.*]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -498,17 +481,9 @@ define void @zext(<8 x i16> %a, <8 x i16> %b, ptr %p) {
 
 define void @sext(<8 x i16> %a, <8 x i16> %b, ptr %p) {
 ; CHECK-LABEL: @sext(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[AB1:%.*]] = sext <4 x i16> [[AB]] to <4 x i32>
-; CHECK-NEXT:    [[AT1:%.*]] = sext <4 x i16> [[AT]] to <4 x i32>
-; CHECK-NEXT:    [[BB1:%.*]] = sext <4 x i16> [[BB]] to <4 x i32>
-; CHECK-NEXT:    [[BT1:%.*]] = sext <4 x i16> [[BT]] to <4 x i32>
-; CHECK-NEXT:    [[ABB:%.*]] = add <4 x i32> [[AB1]], [[BB1]]
-; CHECK-NEXT:    [[ABT:%.*]] = add <4 x i32> [[AT1]], [[BT1]]
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[ABB]], <4 x i32> [[ABT]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[B:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    store <8 x i32> [[R]], ptr [[P:%.*]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -567,11 +542,7 @@ define void @zext_types(<8 x i16> %a, <8 x i32> %b, ptr %p) {
 
 define void @trunc(<8 x i64> %a, <8 x i64> %b, ptr %p) {
 ; CHECK-LABEL: @trunc(
-; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i64> [[A:%.*]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i64> [[A]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[ABB1:%.*]] = trunc <4 x i64> [[AB]] to <4 x i32>
-; CHECK-NEXT:    [[ABT1:%.*]] = trunc <4 x i64> [[AT]] to <4 x i32>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = trunc <8 x i64> [[A:%.*]] to <8 x i32>
 ; CHECK-NEXT:    store <8 x i32> [[R]], ptr [[P:%.*]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -745,13 +716,11 @@ entry:
 
 define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-LABEL: @singleop(
-; CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i8> [[A:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[B1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[A2:%.*]] = zext <4 x i8> [[A1]] to <4 x i16>
-; CHECK-NEXT:    [[B2:%.*]] = zext <4 x i8> [[B1]] to <4 x i16>
-; CHECK-NEXT:    [[AB:%.*]] = add <4 x i16> [[A2]], [[B2]]
-; CHECK-NEXT:    [[T:%.*]] = trunc <4 x i16> [[AB]] to <4 x i8>
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[T]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
 ;
   %a1 = shufflevector <4 x i8> %a, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

>From d2ee97528c6475561d139a0035d3624eb93f49c7 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 20 May 2024 14:57:08 +0100
Subject: [PATCH 2/2] Use mulit-element isa

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 751780b8794b8..bc4ba8f1e54c4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1772,9 +1772,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
         !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
-    } else if (isa<UnaryOperator>(Item[0].first) ||
-               isa<TruncInst>(Item[0].first) || isa<ZExtInst>(Item[0].first) ||
-               isa<SExtInst>(Item[0].first)) {
+    } else if (isa<UnaryOperator, TruncInst, SExtInst, ZExtInst>(
+                   Item[0].first)) {
       Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
     } else if (auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
                II && isTriviallyVectorizable(II->getIntrinsicID())) {



More information about the llvm-commits mailing list