[llvm] a047dfe - [AArch64][GISel] Lower EXT of 0 to a COPY
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 16 09:12:21 PDT 2023
Author: David Green
Date: 2023-08-16T17:12:15+01:00
New Revision: a047dfe0d54294973913b5d93967501c9ed15fa5
URL: https://github.com/llvm/llvm-project/commit/a047dfe0d54294973913b5d93967501c9ed15fa5
DIFF: https://github.com/llvm/llvm-project/commit/a047dfe0d54294973913b5d93967501c9ed15fa5.diff
LOG: [AArch64][GISel] Lower EXT of 0 to a COPY
This allows us to select G_SHUFFLE_VECTOR with identity masks (possibly
including undef elements), but avoid the actual EXT instruction if the shift
amount is 0.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir
llvm/test/CodeGen/AArch64/arm64-dup.ll
llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
llvm/test/CodeGen/AArch64/arm64-vabs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 09389bda94bc06..d8ee35ebf1e21a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -457,11 +457,15 @@ void applyShuffleVectorPseudo(MachineInstr &MI,
/// for the imported tablegen patterns to work.
void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MachineIRBuilder MIRBuilder(MI);
- // Tablegen patterns expect an i32 G_CONSTANT as the final op.
- auto Cst =
- MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
- {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ if (MatchInfo.SrcOps[2].getImm() == 0)
+ MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
+ else {
+ // Tablegen patterns expect an i32 G_CONSTANT as the final op.
+ auto Cst =
+ MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
+ {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ }
MI.eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
index ce5a9ac30d3100..f4374feadcdf3a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
@@ -196,9 +196,8 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[IVEC]], [[DEF]], [[C1]](s32)
- ; CHECK-NEXT: $q0 = COPY [[EXT]](<2 x s64>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[IVEC]](<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%2:_(<2 x s64>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir
index 1e12459771014b..bcf088287f46ae 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir
@@ -194,10 +194,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[COPY]], [[COPY1]], [[C]](s32)
- ; CHECK-NEXT: $q0 = COPY [[EXT]](<2 x s64>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 90dbd618919e2b..a6afddfe3f73aa 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -555,7 +555,6 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: add.4s v0, v0, v1
-; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%tmp1 = add <4 x i32> %A, <i32 8421377, i32 8421377, i32 8421377, i32 8421377>
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 4fd40bb7e229dc..aa048eea302c97 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -137,9 +137,8 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-LABEL: addp_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT: addp v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: fmov w0, s0
@@ -165,9 +164,8 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-GI-LABEL: addp_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT: addp v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%1 = add <8 x i16> %a, %b
%2 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -187,9 +185,8 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-LABEL: addp_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
-; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT: addp v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%1 = add <16 x i8> %a, %b
%2 = shufflevector <16 x i8> %1, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 08cb1ef2e0b51e..831acd242221b0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1614,18 +1614,11 @@ declare float @llvm.fabs.f32(float) nounwind readnone
declare double @llvm.fabs.f64(double) nounwind readnone
define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-SD-LABEL: uabdl_from_extract_dup:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: dup.2s v1, w0
-; CHECK-SD-NEXT: uabdl.2d v0, v0, v1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: uabdl_from_extract_dup:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup.2s v1, w0
-; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
-; CHECK-GI-NEXT: uabdl.2d v0, v0, v1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: uabdl_from_extract_dup:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup.2s v1, w0
+; CHECK-NEXT: uabdl.2d v0, v0, v1
+; CHECK-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -1656,18 +1649,11 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
}
define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-SD-LABEL: sabdl_from_extract_dup:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: dup.2s v1, w0
-; CHECK-SD-NEXT: sabdl.2d v0, v0, v1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabdl_from_extract_dup:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: dup.2s v1, w0
-; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
-; CHECK-GI-NEXT: sabdl.2d v0, v0, v1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabdl_from_extract_dup:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup.2s v1, w0
+; CHECK-NEXT: sabdl.2d v0, v0, v1
+; CHECK-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
More information about the llvm-commits
mailing list