[llvm] 7653586 - [AArch64][GlobalISel] Implement another combine for shufflevector->AArch64 G_EXT.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 29 14:53:35 PDT 2022
Author: Amara Emerson
Date: 2022-09-29T22:53:24+01:00
New Revision: 7653586d88d8fb45008dd1f89c197592e1ce7c31
URL: https://github.com/llvm/llvm-project/commit/7653586d88d8fb45008dd1f89c197592e1ce7c31
DIFF: https://github.com/llvm/llvm-project/commit/7653586d88d8fb45008dd1f89c197592e1ce7c31.diff
LOG: [AArch64][GlobalISel] Implement another combine for shufflevector->AArch64 G_EXT.
This is a port of an existing optimization in AArch64 ISelLowering, handling
a case when the same input vector can be used for both ext inputs.
Differential Revision: https://reviews.llvm.org/D134891
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index eab1de94e9c8f..926b8fc7c3700 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -370,22 +371,60 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
+// Check if an EXT instruction can handle the shuffle mask when the vector
+// sources of the shuffle are the same.
+static bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
+ unsigned NumElts = Ty.getNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = M[0];
+ for (unsigned I = 1; I < NumElts; ++I) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[I] < 0)
+ continue; // Ignore UNDEF indices.
+ if (ExpectedElt != static_cast<unsigned>(M[I]))
+ return false;
+ }
+
+ return true;
+}
+
static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
Register Dst = MI.getOperand(0).getReg();
- auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
- MRI.getType(Dst).getNumElements());
- if (!ExtInfo)
- return false;
- bool ReverseExt;
- uint64_t Imm;
- std::tie(ReverseExt, Imm) = *ExtInfo;
+ LLT DstTy = MRI.getType(Dst);
Register V1 = MI.getOperand(1).getReg();
Register V2 = MI.getOperand(2).getReg();
+ auto Mask = MI.getOperand(3).getShuffleMask();
+ uint64_t Imm;
+ auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
+ uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
+
+ if (!ExtInfo) {
+ if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
+ !isSingletonExtMask(Mask, DstTy))
+ return false;
+
+ Imm = Mask[0] * ExtFactor;
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
+ return true;
+ }
+ bool ReverseExt;
+ std::tie(ReverseExt, Imm) = *ExtInfo;
if (ReverseExt)
std::swap(V1, V2);
- uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
Imm *= ExtFactor;
MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
index 2dbe4e518cd7e..a6c37f631ca23 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir
@@ -267,3 +267,69 @@ body: |
$q0 = COPY %shuf(<8 x s16>)
RET_ReallyLR implicit $q0
...
+---
+name: v2s64_singleton_ext
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64_singleton_ext
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_EXT %v1, %v1, [[C]](s32)
+ ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %v1:_(<2 x s64>) = COPY $q0
+ %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 0)
+ $q0 = COPY %shuf(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: v2s64_singleton_ext_all_undef
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64_singleton_ext_all_undef
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_ZIP2 %v1, %v2
+ ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %v1:_(<2 x s64>) = COPY $q0
+ %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(undef, undef)
+ $q0 = COPY %shuf(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: v2s64_singleton_ext_same
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64_singleton_ext_same
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
+ ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %v1:_(<2 x s64>) = COPY $q0
+ %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 1)
+ $q0 = COPY %shuf(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir
index 41db8ae30dd80..d1d5c6c29ba0d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir
@@ -16,11 +16,12 @@ body: |
; CHECK-LABEL: name: uzp1_v4s32
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
- ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP1_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6)
@@ -38,15 +39,16 @@ body: |
; CHECK-LABEL: name: uzp2_v4s32
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
- ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP2_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
- %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
- $q0 = COPY %1(<4 x s32>)
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
+ $q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
@@ -62,11 +64,12 @@ body: |
; CHECK-LABEL: name: no_uzp1
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
- ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6)
@@ -86,11 +89,12 @@ body: |
; CHECK-LABEL: name: no_uzp2
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
- ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7)
@@ -110,11 +114,12 @@ body: |
; CHECK-LABEL: name: uzp1_undef
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
- ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP1_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6)
@@ -134,13 +139,14 @@ body: |
; CHECK-LABEL: name: uzp2_undef
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
- ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP2_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
- %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
- $q0 = COPY %1(<4 x s32>)
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
+ $q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
More information about the llvm-commits
mailing list