[llvm] [GlobalISel] Fold G_SHUFFLE_VECTOR with a single element mask to G_EXTRACT_VECTOR_ELT (PR #65342)
Vladislav Dzhidzhoev via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 6 09:16:05 PDT 2023
https://github.com/dzhidzhoev updated https://github.com/llvm/llvm-project/pull/65342:
>From 78adb9d02d020a77a420bd091395ed93ba09d44c Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Fri, 1 Sep 2023 18:37:36 +0200
Subject: [PATCH 1/4] [GlobalISel] Fold G_SHUFFLE_VECTOR with a single element
mask to G_EXTRACT_VECTOR_ELT
It introduces minor regression in arm64-vcvt_f.ll, which will be fixed
later.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 2 +
.../include/llvm/Target/GlobalISel/Combine.td | 7 +
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 38 +++
llvm/lib/Target/AArch64/AArch64Combine.td | 3 +-
.../prelegalizercombiner-shuffle-vector.mir | 232 ++++++++++--------
llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll | 9 +-
llvm/test/CodeGen/AArch64/ext-narrow-index.ll | 190 +++++++++++++-
7 files changed, 378 insertions(+), 103 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 75799e5eddcc67..9b464c7e1a19b9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -255,6 +255,8 @@ class CombinerHelper {
/// Replace \p MI with a concat_vectors with \p Ops.
void applyCombineShuffleVector(MachineInstr &MI,
const ArrayRef<Register> Ops);
+ bool matchShuffleToExtract(MachineInstr &MI);
+ void applyShuffleToExtract(MachineInstr &MI);
/// Optimize memcpy intrinsics et al, e.g. constant len calls.
/// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 977d5cba532102..fd73a5995355f5 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -353,6 +353,13 @@ def propagate_undef_shuffle_mask: GICombineRule<
[{ return Helper.matchUndefShuffleVectorMask(*${root}); }]),
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
+// Replace a G_SHUFFLE_VECTOR with a G_EXTRACT_VECTOR_ELT.
+def shuffle_to_extract: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return Helper.matchShuffleToExtract(*${root}); }]),
+ (apply [{ Helper.applyShuffleToExtract(*${root}); }])>;
+
// Replace an insert/extract element of an out of bounds index with undef.
def insert_extract_vec_elt_out_of_bounds : GICombineRule<
(defs root:$root),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 17f030c473f80a..e43e65b6e64750 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -395,6 +395,44 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
replaceRegWith(MRI, DstReg, NewDstReg);
}
+bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ LLT Src1Ty = MRI.getType(MI.getOperand(1).getReg());
+ LLT Src2Ty = MRI.getType(MI.getOperand(2).getReg());
+ int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+ int Src2NumElts = Src2Ty.isVector() ? Src2Ty.getNumElements() : 1;
+
+ return Mask.size() == 1 && Mask[0] >= 0 &&
+ Mask[0] < Src1NumElts + Src2NumElts;
+}
+
+void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ int I = MI.getOperand(3).getShuffleMask()[0];
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT Src1Ty = MRI.getType(Src1);
+ int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+ Register SrcReg;
+ if (I < Src1NumElts)
+ SrcReg = Src1;
+ else {
+ SrcReg = MI.getOperand(2).getReg();
+ I -= Src1NumElts;
+ }
+ if (!MRI.getType(SrcReg).isVector())
+ Builder.buildCopy(NewDstReg, SrcReg);
+ else
+ Builder.buildExtractVectorElementConstant(NewDstReg, SrcReg, I);
+
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 699310d0627dba..7e6d2805a8863c 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -37,7 +37,8 @@ def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
fconstant_to_constant,
icmp_redundant_trunc,
- fold_global_offset]> {
+ fold_global_offset,
+ shuffle_to_extract]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
index 077bce72c5e6e3..1873f58be6f886 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
@@ -12,10 +12,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_0123
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(0, 1, 2, 3)
@@ -32,10 +33,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_230123
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<6 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<6 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(2,3,0,1,2,3)
@@ -52,10 +54,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_2undef01undef3
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<6 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<6 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(2,-1,0,1,-1,3)
@@ -72,10 +75,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_mixed_src_200123_neg
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<6 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 0, 0, 1, 2, 3)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<6 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<6 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 0, 0, 1, 2, 3)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<6 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(2,0,0,1,2,3)
@@ -92,11 +96,12 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_2undef1
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[DEF]](<2 x s32>), [[COPY]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<6 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[COPY1]](<2 x s32>), [[DEF]](<2 x s32>), [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<6 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(2,-1,-1,-1,-1,1)
@@ -113,10 +118,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_src_flipped_230132_neg
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<6 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3, 0, 1, 3, 2)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<6 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<6 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3, 0, 1, 3, 2)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<6 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(2,3,0,1,3,2)
@@ -133,10 +139,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_src_flipped_23013undef_neg
; CHECK: liveins: $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<6 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3, 0, 1, 3, undef)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<6 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<6 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3, 0, 1, 3, undef)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<6 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<6 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(2,3,0,1,3,-1)
@@ -154,10 +161,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_01234567
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<8 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<8 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
@@ -174,10 +182,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_45670123
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<12 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,6,7,0,1,2,3,4,5,6,7)
@@ -194,10 +203,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_45undefundef0123undefundef67
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<12 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,-1,-1,0,1,2,3,-1,-1,6,7)
@@ -214,10 +224,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_mixed_src_456000123_neg
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 6, 0, 0, 1, 2, 3)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 6, 0, 0, 1, 2, 3)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,6,0,0,1,2,3)
@@ -234,11 +245,12 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_45undefundefundefundefundefundefundefundef23
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[DEF]](<4 x s32>), [[COPY]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[DEF]](<4 x s32>), [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<12 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,-1,-1,-1,-1,-1,-1,-1,-1,2,3)
@@ -255,10 +267,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_4501_neg
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1)
@@ -275,10 +288,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_src_flipped_4567012367undefundef_neg
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<12 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 6, 7, 0, 1, 2, 3, 6, 7, undef, undef)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<12 x s32>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<12 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 6, 7, 0, 1, 2, 3, 6, 7, undef, undef)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<12 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<12 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,6,7,0,1,2,3,6,7,-1,-1)
@@ -295,10 +309,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_45undefundef0123undefundef67_ptr
; CHECK: liveins: $q0_q1, $q2_q3
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x p0>) = COPY $q0_q1
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x p0>) = COPY $q2_q3
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x p0>) = G_CONCAT_VECTORS [[COPY1]](<4 x p0>), [[COPY]](<4 x p0>), [[COPY1]](<4 x p0>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x p0>) = COPY $q0_q1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x p0>) = COPY $q2_q3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x p0>) = G_CONCAT_VECTORS [[COPY1]](<4 x p0>), [[COPY]](<4 x p0>), [[COPY1]](<4 x p0>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<12 x p0>)
%0:_(<4 x p0>) = COPY $q0_q1
%1:_(<4 x p0>) = COPY $q2_q3
%2:_(<12 x p0>) = G_SHUFFLE_VECTOR %0(<4 x p0>), %1(<4 x p0>), shufflemask(4,5,-1,-1,0,1,2,3,-1,-1,6,7)
@@ -315,10 +330,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_concat_vector_mixed_src_456000123_neg_ptr
; CHECK: liveins: $q0_q1, $q2_q3
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x p0>) = COPY $q0_q1
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x p0>) = COPY $q2_q3
- ; CHECK: [[SHUF:%[0-9]+]]:_(<8 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<4 x p0>), [[COPY1]], shufflemask(4, 5, 6, 0, 0, 1, 2, 3)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](<8 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x p0>) = COPY $q0_q1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x p0>) = COPY $q2_q3
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<4 x p0>), [[COPY1]], shufflemask(4, 5, 6, 0, 0, 1, 2, 3)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x p0>)
%0:_(<4 x p0>) = COPY $q0_q1
%1:_(<4 x p0>) = COPY $q2_q3
%2:_(<8 x p0>) = G_SHUFFLE_VECTOR %0(<4 x p0>), %1(<4 x p0>), shufflemask(4,5,6,0,0,1,2,3)
@@ -336,12 +352,13 @@ body: |
; CHECK-LABEL: name: shuffle_vector_to_build_vector_ptr
; CHECK: liveins: $x0, $x1, $x2, $x3
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
- ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(p0) = COPY $x2
@@ -362,10 +379,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_on_scalars_to_build_vector_ptr
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY]](p0), [[COPY1]](p0)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY]](p0), [[COPY1]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%6:_(<4 x p0>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,1,0,1)
@@ -383,10 +401,11 @@ body: |
; CHECK-LABEL: name: shuffle_vector_on_scalars_to_build_vector_swap_ptr
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY]](p0)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x p0>)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%6:_(<2 x p0>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1,0)
@@ -404,11 +423,12 @@ body: |
; CHECK-LABEL: name: shuffle_vector_on_scalars_to_build_vector_with_undef
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[COPY1]](s64)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%6:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,-1,-1,1)
@@ -427,32 +447,48 @@ body: |
; CHECK-LABEL: name: shuffle_vector_on_scalars_to_copy_ptr
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: RET_ReallyLR implicit [[COPY]](p0)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: RET_ReallyLR implicit [[COPY]](p0)
%0:_(p0) = COPY $x0
%6:_(p0) = G_SHUFFLE_VECTOR %0, %0, shufflemask(0)
RET_ReallyLR implicit %6
...
-
-# Check that shuffle_vector on vector doesn't get combined
-# when the resulting type is a scalar.
-# We should be able to replace this by an extract vector element,
-# but that's not implemented yet.
---
-name: shuffle_vector_to_copy_neg
+name: shuffle_vector_to_copy_lhs
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
- ; CHECK-LABEL: name: shuffle_vector_to_copy_neg
+ ; CHECK-LABEL: name: shuffle_vector_to_copy_lhs
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $x1
- ; CHECK: [[SHUF:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(1)
- ; CHECK: RET_ReallyLR implicit [[SHUF]](s32)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[EVEC]](s32)
%0:_(<2 x s32>) = COPY $x0
%1:_(<2 x s32>) = COPY $x1
%6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1)
RET_ReallyLR implicit %6
...
+---
+name: shuffle_vector_to_copy_rhs
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: shuffle_vector_to_copy_rhs
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[EVEC]](s32)
+ %0:_(<2 x s32>) = COPY $x0
+ %1:_(<2 x s32>) = COPY $x1
+ %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2)
+ RET_ReallyLR implicit %6
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index e94aac3b59c69a..eacb7731eced4e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -46,7 +46,8 @@ define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind r
;
; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
+; GISEL-NEXT: mov d0, v0[1]
+; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: ret
%bc1 = bitcast <4 x float> %x to <2 x double>
%ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1>
@@ -63,7 +64,8 @@ define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind rea
;
; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
+; GISEL-NEXT: mov d0, v0[1]
+; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: ret
%ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x i64> %ext to <2 x float>
@@ -129,7 +131,8 @@ define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind read
;
; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
+; GISEL-NEXT: mov d0, v0[1]
+; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: ret
%ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x i64> %ext to <4 x half>
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index b296a79ce4f406..00d89271270508 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -global-isel -mtriple=aarch64 | FileCheck %s --check-prefix=CHECK-GISEL
; Tests of shufflevector where the index operand is half the width of the vector
; operands. We should get one ext instruction and not two.
@@ -10,6 +11,11 @@ define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle
@@ -21,6 +27,12 @@ define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i8> %shuffle
@@ -32,6 +44,12 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_off8:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %shuffle
@@ -43,6 +61,12 @@ define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #15
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_off15:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
ret <8 x i8> %shuffle
@@ -54,6 +78,12 @@ define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-NEXT: ext v0.16b, v1.16b, v1.16b, #6
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_off22:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v1.16b, v0.16b, #6
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
ret <8 x i8> %shuffle
@@ -65,6 +95,11 @@ define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle
@@ -76,6 +111,12 @@ define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %shuffle
@@ -87,6 +128,12 @@ define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #14
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_off7:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
ret <4 x i16> %shuffle
@@ -98,6 +145,12 @@ define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_off8:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: mov v0.16b, v1.16b
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
ret <4 x i16> %shuffle
@@ -109,6 +162,11 @@ define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle
@@ -120,6 +178,12 @@ define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %shuffle
@@ -131,6 +195,12 @@ define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_off3:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 3, i32 4>
ret <2 x i32> %shuffle
@@ -142,6 +212,12 @@ define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_off4:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: mov v0.16b, v1.16b
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 4, i32 5>
ret <2 x i32> %shuffle
@@ -153,6 +229,11 @@ define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i64_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 0>
ret <1 x i64> %shuffle
@@ -164,6 +245,11 @@ define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i64_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: mov d0, v0.d[1]
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 1>
ret <1 x i64> %shuffle
@@ -175,6 +261,12 @@ define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) {
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i64_off2:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: mov v0.16b, v1.16b
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 2>
ret <1 x i64> %shuffle
@@ -186,6 +278,11 @@ define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_zero_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle
@@ -197,6 +294,13 @@ define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_zero_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i8> %shuffle
@@ -208,6 +312,13 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_zero_off8:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %shuffle
@@ -220,6 +331,13 @@ define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #15
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_zero_off15:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
ret <8 x i8> %shuffle
@@ -230,6 +348,13 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i8_zero_off22:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v1.16b, v0.16b, #6
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
ret <8 x i8> %shuffle
@@ -241,6 +366,11 @@ define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_zero_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle
@@ -252,6 +382,13 @@ define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_zero_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %shuffle
@@ -264,6 +401,13 @@ define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #14
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_zero_off7:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
ret <4 x i16> %shuffle
@@ -274,6 +418,11 @@ define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i16_zero_off8:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
ret <4 x i16> %shuffle
@@ -285,6 +434,11 @@ define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_zero_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle
@@ -296,6 +450,13 @@ define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_zero_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %shuffle
@@ -308,6 +469,13 @@ define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_zero_off3:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 3, i32 4>
ret <2 x i32> %shuffle
@@ -318,6 +486,11 @@ define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i32_zero_off4:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 4, i32 5>
ret <2 x i32> %shuffle
@@ -329,6 +502,11 @@ define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i64_zero_off0:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 0>
ret <1 x i64> %shuffle
@@ -340,6 +518,11 @@ define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i64_zero_off1:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: mov d0, v0.d[1]
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 1>
ret <1 x i64> %shuffle
@@ -350,6 +533,11 @@ define <1 x i64> @i64_zero_off2(<2 x i64> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d0, xzr
; CHECK-NEXT: ret
+;
+; CHECK-GISEL-LABEL: i64_zero_off2:
+; CHECK-GISEL: // %bb.0: // %entry
+; CHECK-GISEL-NEXT: fmov d0, xzr
+; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 2>
ret <1 x i64> %shuffle
>From af84b51d7c9699820d9225afe7d8b4309efad959 Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Tue, 5 Sep 2023 18:19:04 +0200
Subject: [PATCH 2/4] Addressed @jayfoad comment: simplified match function.
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e43e65b6e64750..60885fedff58f1 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -398,14 +398,9 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
"Invalid instruction kind");
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT Src1Ty = MRI.getType(MI.getOperand(1).getReg());
- LLT Src2Ty = MRI.getType(MI.getOperand(2).getReg());
- int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
- int Src2NumElts = Src2Ty.isVector() ? Src2Ty.getNumElements() : 1;
- return Mask.size() == 1 && Mask[0] >= 0 &&
- Mask[0] < Src1NumElts + Src2NumElts;
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ return Mask.size() == 1 && Mask[0] >= 0;
}
void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) {
>From 8eb3383ede7c714d69bfaa9094c9e1b07ece1e75 Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Tue, 5 Sep 2023 18:25:48 +0200
Subject: [PATCH 3/4] Addressed @davemgreen comment: regenerate
ext-narrow-index.ll
---
llvm/test/CodeGen/AArch64/ext-narrow-index.ll | 234 +++++-------------
1 file changed, 61 insertions(+), 173 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index 00d89271270508..2c5d33da93c863 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECK
-; RUN: llc < %s -global-isel -mtriple=aarch64 | FileCheck %s --check-prefix=CHECK-GISEL
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -global-isel -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
; Tests of shufflevector where the index operand is half the width of the vector
; operands. We should get one ext instruction and not two.
@@ -11,22 +11,17 @@ define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle
}
define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
-; CHECK-LABEL: i8_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i8_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i8_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -39,11 +34,11 @@ entry:
}
define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
-; CHECK-LABEL: i8_off8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i8_off8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i8_off8:
; CHECK-GISEL: // %bb.0: // %entry
@@ -61,23 +56,17 @@ define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #15
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_off15:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #15
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
ret <8 x i8> %shuffle
}
define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) {
-; CHECK-LABEL: i8_off22:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v1.16b, v1.16b, #6
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i8_off22:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v1.16b, v1.16b, #6
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i8_off22:
; CHECK-GISEL: // %bb.0: // %entry
@@ -95,22 +84,17 @@ define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle
}
define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
-; CHECK-LABEL: i16_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i16_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i16_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -128,12 +112,6 @@ define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #14
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_off7:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #14
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
ret <4 x i16> %shuffle
@@ -145,12 +123,6 @@ define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_off8:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: mov v0.16b, v1.16b
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
ret <4 x i16> %shuffle
@@ -162,22 +134,17 @@ define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle
}
define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
-; CHECK-LABEL: i32_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i32_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i32_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -195,12 +162,6 @@ define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_off3:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 3, i32 4>
ret <2 x i32> %shuffle
@@ -212,12 +173,6 @@ define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_off4:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: mov v0.16b, v1.16b
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 4, i32 5>
ret <2 x i32> %shuffle
@@ -229,22 +184,17 @@ define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i64_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 0>
ret <1 x i64> %shuffle
}
define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) {
-; CHECK-LABEL: i64_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i64_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i64_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -261,12 +211,6 @@ define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) {
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i64_off2:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: mov v0.16b, v1.16b
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 2>
ret <1 x i64> %shuffle
@@ -278,22 +222,17 @@ define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_zero_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle
}
define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
-; CHECK-LABEL: i8_zero_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i8_zero_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i8_zero_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -307,11 +246,11 @@ entry:
}
define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
-; CHECK-LABEL: i8_zero_off8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i8_zero_off8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i8_zero_off8:
; CHECK-GISEL: // %bb.0: // %entry
@@ -331,23 +270,16 @@ define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #15
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_zero_off15:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #15
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
ret <8 x i8> %shuffle
}
define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
-; CHECK-LABEL: i8_zero_off22:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i8_zero_off22:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i8_zero_off22:
; CHECK-GISEL: // %bb.0: // %entry
@@ -366,22 +298,17 @@ define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_zero_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle
}
define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
-; CHECK-LABEL: i16_zero_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i16_zero_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i16_zero_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -401,13 +328,6 @@ define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #14
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_zero_off7:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #14
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
ret <4 x i16> %shuffle
@@ -418,11 +338,6 @@ define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_zero_off8:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
ret <4 x i16> %shuffle
@@ -434,22 +349,17 @@ define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_zero_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle
}
define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
-; CHECK-LABEL: i32_zero_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i32_zero_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i32_zero_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -469,13 +379,6 @@ define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) {
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_zero_off3:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 3, i32 4>
ret <2 x i32> %shuffle
@@ -486,11 +389,6 @@ define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_zero_off4:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 4, i32 5>
ret <2 x i32> %shuffle
@@ -502,22 +400,17 @@ define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i64_zero_off0:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 0>
ret <1 x i64> %shuffle
}
define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) {
-; CHECK-LABEL: i64_zero_off1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: i64_zero_off1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
;
; CHECK-GISEL-LABEL: i64_zero_off1:
; CHECK-GISEL: // %bb.0: // %entry
@@ -533,11 +426,6 @@ define <1 x i64> @i64_zero_off2(<2 x i64> %arg1) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d0, xzr
; CHECK-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i64_zero_off2:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: fmov d0, xzr
-; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 2>
ret <1 x i64> %shuffle
>From d8a644028078848dade5c0fcbd568f4d067a06c2 Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Wed, 6 Sep 2023 18:12:13 +0200
Subject: [PATCH 4/4] Addressed @jayfoad comment: suport negative mask elt
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 20 +++++++++----------
.../prelegalizercombiner-shuffle-vector.mir | 17 ++++++++++++++++
2 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 60885fedff58f1..836a79607cef87 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -400,32 +400,32 @@ bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
"Invalid instruction kind");
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- return Mask.size() == 1 && Mask[0] >= 0;
+ return Mask.size() == 1;
}
void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Builder.setInsertPt(*MI.getParent(), MI);
- Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
int I = MI.getOperand(3).getShuffleMask()[0];
Register Src1 = MI.getOperand(1).getReg();
LLT Src1Ty = MRI.getType(Src1);
int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
Register SrcReg;
- if (I < Src1NumElts)
- SrcReg = Src1;
- else {
+ if (I >= Src1NumElts) {
SrcReg = MI.getOperand(2).getReg();
I -= Src1NumElts;
- }
- if (!MRI.getType(SrcReg).isVector())
- Builder.buildCopy(NewDstReg, SrcReg);
+ } else if (I >= 0)
+ SrcReg = Src1;
+
+ if (I < 0)
+ Builder.buildUndef(DstReg);
+ else if (!MRI.getType(SrcReg).isVector())
+ Builder.buildCopy(DstReg, SrcReg);
else
- Builder.buildExtractVectorElementConstant(NewDstReg, SrcReg, I);
+ Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
MI.eraseFromParent();
- replaceRegWith(MRI, DstReg, NewDstReg);
}
namespace {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
index 1873f58be6f886..2c9ae5b06b62e4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir
@@ -492,3 +492,20 @@ body: |
%6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2)
RET_ReallyLR implicit %6
...
+---
+name: shuffle_vector_to_copy_undef
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: shuffle_vector_to_copy_undef
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: RET_ReallyLR implicit [[DEF]](s32)
+ %0:_(<2 x s32>) = COPY $x0
+ %1:_(<2 x s32>) = COPY $x1
+ %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1)
+ RET_ReallyLR implicit %6
+...
More information about the llvm-commits
mailing list