[llvm] [VectorCombine] Add support for zext/sext/trunc to shuffleToIdentity (PR #92696)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu May 23 10:11:02 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/92696
>From 0ba97b391c89226973d0fa65ea29b6b68783efd3 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 23 May 2024 18:08:54 +0100
Subject: [PATCH] [VectorCombine] Add support for zext/sext/trunc to
shuffleToIdentity
This is one of the simple additions to shuffleToIdentity that help it look
through intermediate zext/sext instructions. The other change here is the
removal of a check that both operands of the original shuffle are instructions,
which is a relic from a previous version.
---
.../Transforms/Vectorize/VectorCombine.cpp | 8 +-
.../AArch64/shuffletoidentity.ll | 80 +++++--------------
2 files changed, 26 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b5a292841172b..41a8098ddae1a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1742,6 +1742,9 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
if (auto *BI = dyn_cast<BinaryOperator>(I))
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0],
Ops[1]);
+ if (auto CI = dyn_cast<CastInst>(I))
+ return Builder.CreateCast((Instruction::CastOps)CI->getOpcode(), Ops[0],
+ DstTy);
if (II)
return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
assert(isa<UnaryInstruction>(I) && "Unexpected instruction type in Generate");
@@ -1753,8 +1756,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
// do so.
bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
auto *Ty = dyn_cast<FixedVectorType>(I.getType());
- if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
- !isa<Instruction>(I.getOperand(1)))
+ if (!Ty)
return false;
SmallVector<InstLane> Start(Ty->getNumElements());
@@ -1825,7 +1827,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
!cast<BinaryOperator>(FrontV)->isIntDivRem()) {
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- } else if (isa<UnaryOperator>(FrontV)) {
+ } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontV)) {
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
} else if (auto *II = dyn_cast<IntrinsicInst>(FrontV);
II && isTriviallyVectorizable(II->getIntrinsicID())) {
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 7dadeb5d72de0..69ae9c359bae1 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -15,9 +15,7 @@ define <8 x i8> @trivial(<8 x i8> %a) {
define <4 x i32> @add_same_operands(<4 x i32> %x) {
; CHECK-LABEL: @add_same_operands(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
-; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVSHUF:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
; CHECK-NEXT: ret <4 x i32> [[REVSHUF]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -372,8 +370,7 @@ define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
define <4 x i32> @extrause_add_same_operands(<4 x i32> %x) {
; CHECK-LABEL: @extrause_add_same_operands(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
-; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVSHUF:%.*]] = add <4 x i32> [[X]], [[X]]
; CHECK-NEXT: [[ADD2:%.*]] = add <4 x i32> [[SHUF]], [[REVSHUF]]
; CHECK-NEXT: ret <4 x i32> [[ADD2]]
;
@@ -543,9 +540,7 @@ define <8 x half> @fma(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
define <4 x i64> @single_zext(<4 x i32> %x) {
; CHECK-LABEL: @single_zext(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
-; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVSHUF:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[REVSHUF]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -600,19 +595,10 @@ define <8 x i16> @not_bitcast2(<4 x i32> %x, <8 x i16> %y) {
define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) {
; CHECK-LABEL: @exttrunc(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[AB1:%.*]] = zext <4 x i32> [[AB]] to <4 x i64>
-; CHECK-NEXT: [[AT1:%.*]] = zext <4 x i32> [[AT]] to <4 x i64>
-; CHECK-NEXT: [[BB1:%.*]] = sext <4 x i32> [[BB]] to <4 x i64>
-; CHECK-NEXT: [[BT1:%.*]] = sext <4 x i32> [[BT]] to <4 x i64>
-; CHECK-NEXT: [[ABB:%.*]] = add <4 x i64> [[AB1]], [[BB1]]
-; CHECK-NEXT: [[ABT:%.*]] = add <4 x i64> [[AT1]], [[BT1]]
-; CHECK-NEXT: [[ABB1:%.*]] = trunc <4 x i64> [[ABB]] to <4 x i32>
-; CHECK-NEXT: [[ABT1:%.*]] = trunc <4 x i64> [[ABT]] to <4 x i32>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[A:%.*]] to <8 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[B:%.*]] to <8 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[R:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i32>
; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32
; CHECK-NEXT: ret void
;
@@ -635,17 +621,9 @@ define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) {
define void @zext(<8 x i16> %a, <8 x i16> %b, ptr %p) {
; CHECK-LABEL: @zext(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[AB1:%.*]] = zext <4 x i16> [[AB]] to <4 x i32>
-; CHECK-NEXT: [[AT1:%.*]] = zext <4 x i16> [[AT]] to <4 x i32>
-; CHECK-NEXT: [[BB1:%.*]] = zext <4 x i16> [[BB]] to <4 x i32>
-; CHECK-NEXT: [[BT1:%.*]] = zext <4 x i16> [[BT]] to <4 x i32>
-; CHECK-NEXT: [[ABB:%.*]] = add <4 x i32> [[AB1]], [[BB1]]
-; CHECK-NEXT: [[ABT:%.*]] = add <4 x i32> [[AT1]], [[BT1]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB]], <4 x i32> [[ABT]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[B:%.*]] to <8 x i32>
+; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32
; CHECK-NEXT: ret void
;
@@ -666,17 +644,9 @@ define void @zext(<8 x i16> %a, <8 x i16> %b, ptr %p) {
define void @sext(<8 x i16> %a, <8 x i16> %b, ptr %p) {
; CHECK-LABEL: @sext(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[AB1:%.*]] = sext <4 x i16> [[AB]] to <4 x i32>
-; CHECK-NEXT: [[AT1:%.*]] = sext <4 x i16> [[AT]] to <4 x i32>
-; CHECK-NEXT: [[BB1:%.*]] = sext <4 x i16> [[BB]] to <4 x i32>
-; CHECK-NEXT: [[BT1:%.*]] = sext <4 x i16> [[BT]] to <4 x i32>
-; CHECK-NEXT: [[ABB:%.*]] = add <4 x i32> [[AB1]], [[BB1]]
-; CHECK-NEXT: [[ABT:%.*]] = add <4 x i32> [[AT1]], [[BT1]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB]], <4 x i32> [[ABT]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[B:%.*]] to <8 x i32>
+; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32
; CHECK-NEXT: ret void
;
@@ -735,11 +705,7 @@ define void @zext_types(<8 x i16> %a, <8 x i32> %b, ptr %p) {
define void @trunc(<8 x i64> %a, <8 x i64> %b, ptr %p) {
; CHECK-LABEL: @trunc(
-; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i64> [[A:%.*]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i64> [[A]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[ABB1:%.*]] = trunc <4 x i64> [[AB]] to <4 x i32>
-; CHECK-NEXT: [[ABT1:%.*]] = trunc <4 x i64> [[AT]] to <4 x i32>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: [[R:%.*]] = trunc <8 x i64> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32
; CHECK-NEXT: ret void
;
@@ -754,10 +720,8 @@ define void @trunc(<8 x i64> %a, <8 x i64> %b, ptr %p) {
define <4 x i64> @zext_chain(<4 x i16> %x) {
; CHECK-LABEL: @zext_chain(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i16> [[X:%.*]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i16> [[SHUF]] to <4 x i32>
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i32> [[ZEXT]] to <4 x i64>
-; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[SEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i16> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT: [[REVSHUF:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[REVSHUF]]
;
%shuf = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -958,13 +922,11 @@ entry:
define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: @singleop(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[A:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[A2:%.*]] = zext <4 x i8> [[A1]] to <4 x i16>
-; CHECK-NEXT: [[B2:%.*]] = zext <4 x i8> [[B1]] to <4 x i16>
-; CHECK-NEXT: [[AB:%.*]] = add <4 x i16> [[A2]], [[B2]]
-; CHECK-NEXT: [[T:%.*]] = trunc <4 x i16> [[AB]] to <4 x i8>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[T]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[R:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%a1 = shufflevector <4 x i8> %a, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
More information about the llvm-commits
mailing list