[llvm] [GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a (PR #115377)
Konstantin Schwarz via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 14:06:02 PST 2024
https://github.com/konstantinschwarz created https://github.com/llvm/llvm-project/pull/115377
None
>From 6ef798194bb3026ca03cef1a685077220d86f157 Mon Sep 17 00:00:00 2001
From: Konstantin Schwarz <konstantin.schwarz at amd.com>
Date: Tue, 5 Nov 2024 21:26:26 +0000
Subject: [PATCH] [GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b,
mask iff mask does not reference a
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 +
.../include/llvm/Target/GlobalISel/Combine.td | 10 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 50 ++++++++
.../GlobalISel/combine-extract-vec-elt.mir | 4 +-
.../AArch64/GlobalISel/combine-freeze.mir | 8 +-
...rcombiner-shuffle-vector-disjoint-mask.mir | 101 ++++++++++++++++
.../AArch64/aarch64-matrix-umull-smull.ll | 12 +-
llvm/test/CodeGen/AArch64/aarch64-smull.ll | 3 +-
llvm/test/CodeGen/AArch64/ext-narrow-index.ll | 110 ++++++------------
9 files changed, 211 insertions(+), 91 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 72573facf1a7fe..f6af149708bc7a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -871,6 +871,10 @@ class CombinerHelper {
/// Remove references to rhs if it is undef
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not
+ /// reference a.
+ bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction def'd on \p MO from the function.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5928b369913916..6cf78cf013d498 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1577,6 +1577,13 @@ def combine_shuffle_undef_rhs : GICombineRule<
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
>;
+def combine_shuffle_disjoint_mask : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return Helper.matchShuffleDisjointMask(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
// match_extract_of_element and insert_vector_elt_oob must be the first!
def vector_ops_combines: GICombineGroup<[
match_extract_of_element_undef_vector,
@@ -1927,7 +1934,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
- combine_shuffle_undef_rhs]>;
+ combine_shuffle_undef_rhs,
+ combine_shuffle_disjoint_mask]>;
def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3b648a7e3f4472..5d31ce940f31a2 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7726,3 +7726,53 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI,
return true;
}
+
+bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+
+ auto &Shuffle = cast<GShuffleVector>(MI);
+ // If any of the two inputs is already undef, don't check the mask again to
+ // prevent infinite loop
+ if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
+ return false;
+
+ if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
+ return false;
+
+ ArrayRef<int> Mask = Shuffle.getMask();
+ const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
+
+ const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+
+ bool TouchesSrc1 = false;
+ bool TouchesSrc2 = false;
+ const unsigned NumElems = Mask.size();
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ if (Mask[Idx] < 0)
+ continue;
+
+ if (Mask[Idx] < (int)NumSrcElems)
+ TouchesSrc1 = true;
+ else
+ TouchesSrc2 = true;
+ }
+
+ if (!(TouchesSrc1 ^ TouchesSrc2))
+ return false;
+
+ MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
+ Register Undef = B.buildUndef(Src1Ty).getReg(0);
+ Register NewSrc1;
+ Register NewSrc2;
+ if (TouchesSrc1) {
+ NewSrc1 = Shuffle.getSrc1Reg();
+ NewSrc2 = Undef;
+ } else {
+ NewSrc1 = Undef;
+ NewSrc2 = Shuffle.getSrc2Reg();
+ }
+ B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, NewSrc2, Mask);
+ };
+
+ return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 83b5c388520eb3..e2933690c7c55a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -598,9 +598,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = COPY $x1
- ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 0)
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
; CHECK-NEXT: RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index fa725ad0c5fb41..6b84a8488e478d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -1395,9 +1395,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %freeze_sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
@@ -1422,9 +1422,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %elt:_(s32) = COPY $w0
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %elt
; CHECK-NEXT: %extract:_(<4 x s32>) = G_INSERT_VECTOR_ELT %freeze_sv, [[FREEZE]](s32), %idx(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
new file mode 100644
index 00000000000000..e2ed8ac8e7150e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir
@@ -0,0 +1,101 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: shuffle_vector_unused_lhs
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_unused_lhs
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<2 x s32>), [[COPY]], shufflemask(3, 2, 3, 2)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(3, 2, 3, 2)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_unused_rhs
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_unused_rhs
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, 0, 1, 1)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,0,1,1)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_both_used
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_both_used
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,2,1,3)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_undef_elems
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: shuffle_vector_undef_elems
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0, 1, undef)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1)
+ RET_ReallyLR implicit %2
+...
+
+---
+name: shuffle_vector_scalar
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: shuffle_vector_scalar
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0)
+ RET_ReallyLR implicit %2
+...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 99dfac807dcd15..956e220e7efa95 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -201,21 +201,21 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
; CHECK-GI: // %bb.0: // %vector.header
; CHECK-GI-NEXT: and w9, w3, #0xffff
; CHECK-GI-NEXT: adrp x8, .LCPI2_0
-; CHECK-GI-NEXT: dup v1.4s, w9
+; CHECK-GI-NEXT: dup v0.4s, w9
; CHECK-GI-NEXT: mov w9, w0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NEXT: and x8, x9, #0xfffffff8
; CHECK-GI-NEXT: .LBB2_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w9, [x2], #16
; CHECK-GI-NEXT: subs x8, x8, #8
-; CHECK-GI-NEXT: mov v0.s[0], w9
+; CHECK-GI-NEXT: mov v2.s[0], w9
; CHECK-GI-NEXT: mov w9, w0
; CHECK-GI-NEXT: add w0, w0, #8
; CHECK-GI-NEXT: lsl x9, x9, #2
-; CHECK-GI-NEXT: tbl v3.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: mul v3.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: str q3, [x1, x9]
+; CHECK-GI-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v1.16b
+; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: str q2, [x1, x9]
; CHECK-GI-NEXT: b.ne .LBB2_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 3c4901ade972ec..69a69dbd3b18ba 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2385,9 +2385,8 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
;
; CHECK-GI-LABEL: do_stuff:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
-; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8
+; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index 2c5d33da93c863..177f2cafcf833a 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -17,17 +17,11 @@ entry:
}
define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
-; CHECK-SD-LABEL: i8_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i8_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i8> %shuffle
@@ -42,8 +36,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
;
; CHECK-GISEL-LABEL: i8_off8:
; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -90,17 +83,11 @@ entry:
}
define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
-; CHECK-SD-LABEL: i16_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i16_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %shuffle
@@ -140,17 +127,11 @@ entry:
}
define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
-; CHECK-SD-LABEL: i32_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i32_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %shuffle
@@ -228,18 +209,11 @@ entry:
}
define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
-; CHECK-SD-LABEL: i8_zero_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i8_zero_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #1
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i8_zero_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i8> %shuffle
@@ -254,9 +228,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
;
; CHECK-GISEL-LABEL: i8_zero_off8:
; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -283,8 +255,8 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
;
; CHECK-GISEL-LABEL: i8_zero_off22:
; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v1.16b, v0.16b, #6
+; CHECK-GISEL-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #6
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: ret
entry:
@@ -304,18 +276,11 @@ entry:
}
define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
-; CHECK-SD-LABEL: i16_zero_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i16_zero_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #2
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i16_zero_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %shuffle
@@ -355,18 +320,11 @@ entry:
}
define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
-; CHECK-SD-LABEL: i32_zero_off1:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GISEL-LABEL: i32_zero_off1:
-; CHECK-GISEL: // %bb.0: // %entry
-; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
-; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #4
-; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT: ret
+; CHECK-LABEL: i32_zero_off1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %shuffle
More information about the llvm-commits
mailing list