[llvm] 3a51eed - [AArch64][GlobalISel] Legalize G_SHUFFLE_VECTOR with smaller dest size
Vladislav Dzhidzhoev via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 27 14:47:06 PST 2023
Author: Vladislav Dzhidzhoev
Date: 2023-02-27T23:46:44+01:00
New Revision: 3a51eed94846bee4b1e0ad1f160e1de6d5a7a9d0
URL: https://github.com/llvm/llvm-project/commit/3a51eed94846bee4b1e0ad1f160e1de6d5a7a9d0
DIFF: https://github.com/llvm/llvm-project/commit/3a51eed94846bee4b1e0ad1f160e1de6d5a7a9d0.diff
LOG: [AArch64][GlobalISel] Legalize G_SHUFFLE_VECTOR with smaller dest size
Legalize G_SHUFFLE_VECTOR having destination vector length smaller than
source vector length by reshaping destination vector.
Differential Revision: https://reviews.llvm.org/D144670
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
llvm/test/CodeGen/AArch64/arm64-vabs.ll
llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 43c123d1a6666..f43390cb777dd 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -328,6 +328,9 @@ class LegalizerHelper {
unsigned TypeIdx,
LLT NarrowTy);
+ /// Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
+ LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI);
+
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx,
LLT NarrowTy);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7aa868ab97480..e68fa70862c19 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4978,9 +4978,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
}
}
-/// Expand source vectors to the size of destination vector.
-static LegalizerHelper::LegalizeResult
-equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
+LegalizerHelper::LegalizeResult
+LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
@@ -4991,10 +4990,24 @@ equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
Register DstReg = MI.getOperand(0).getReg();
LLT DestEltTy = DstTy.getElementType();
- // TODO: Normalize the shuffle vector since mask and vector length don't
- // match.
- if (MaskNumElts <= SrcNumElts) {
- return LegalizerHelper::LegalizeResult::UnableToLegalize;
+ if (MaskNumElts == SrcNumElts)
+ return Legalized;
+
+ if (MaskNumElts < SrcNumElts) {
+ // Extend mask to match new destination vector size with
+ // undef values.
+ SmallVector<int, 16> NewMask(Mask);
+ for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
+ NewMask.push_back(-1);
+
+ moreElementsVectorDst(MI, SrcTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+
+ return Legalized;
}
unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
@@ -5055,8 +5068,8 @@ LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned WidenNumElts = MoreTy.getNumElements();
if (DstTy.isVector() && Src1Ty.isVector() &&
- DstTy.getNumElements() > Src1Ty.getNumElements()) {
- return equalizeVectorShuffleLengths(MI, MIRBuilder);
+ DstTy.getNumElements() != Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI);
}
if (TypeIdx != 0)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5feeff780d775..86f72d657b763 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -730,7 +730,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
changeTo(1, 0))
.moreElementsToNextPow2(0)
.clampNumElements(0, v4s32, v4s32)
- .clampNumElements(0, v2s64, v2s64);
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsIf(
+ [](const LegalityQuery &Query) {
+ return Query.Types[0].isVector() && Query.Types[1].isVector() &&
+ Query.Types[0].getNumElements() <
+ Query.Types[1].getNumElements();
+ },
+ changeTo(0, 1));
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 64c1f84ae6386..864275664882c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -460,3 +460,144 @@ body: |
RET_ReallyLR
...
+---
+name: shuffle_v3i32_v4i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: shuffle_v3i32_v4i32
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, undef)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[UV]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<3 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4)
+ %3:_(s32), %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %2(<3 x s32>)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: shuffle_v2i32_v4i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: shuffle_v2i32_v4i32
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 4, undef, undef)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: $d0 = COPY [[UV]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4)
+ $d0 = COPY %2(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: shuffle_v4i16_v8i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: shuffle_v4i16_v8i16
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s16>), [[COPY1]], shufflemask(15, 14, 13, 4, undef, undef, undef, undef)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[SHUF]](<8 x s16>)
+ ; CHECK-NEXT: $d0 = COPY [[UV]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s16>) = COPY $q1
+ %2:_(<4 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %1, shufflemask(15, 14, 13, 4)
+ $d0 = COPY %2(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: shuffle_v8i8_v16i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: shuffle_v8i8_v16i8
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s8>), [[COPY1]], shufflemask(7, 13, 6, 4, 17, 3, 0, 0, undef, undef, undef, undef, undef, undef, undef, undef)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[SHUF]](<16 x s8>)
+ ; CHECK-NEXT: $d0 = COPY [[UV]](<8 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<16 x s8>) = COPY $q0
+ %1:_(<16 x s8>) = COPY $q1
+ %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(7, 13, 6, 4, 17, 3, 0, 0)
+ $d0 = COPY %2(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: size_shuffle_v4i32_v6i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0, $s1, $s2, $s3, $s4, $s5
+
+ ; CHECK-LABEL: name: size_shuffle_v4i32_v6i32
+ ; CHECK: liveins: $s0, $s1, $s2, $s3, $s4, $s5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $s2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $s3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $s4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $s5
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR3]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %3:_(s32) = COPY $s0
+ %4:_(s32) = COPY $s1
+ %5:_(s32) = COPY $s2
+ %6:_(s32) = COPY $s3
+ %7:_(s32) = COPY $s4
+ %8:_(s32) = COPY $s5
+ %0:_(<6 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32), %7(s32), %8(s32)
+ %19:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<6 x s32>), %0, shufflemask(3, 4, 7, 0)
+ $q0 = COPY %19(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 8860c24a5d505..30599595bfbb0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -47,13 +47,23 @@ define <2 x i64> @sabdl2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp4
}
+; FALLBACK-NOT: remark:{{.*}} sabdl2_8h
define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: sabdl2_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1, #8]
-; CHECK-NEXT: sabdl.8h v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: sabdl2_8h:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr d0, [x0, #8]
+; DAG-NEXT: ldr d1, [x1, #8]
+; DAG-NEXT: sabdl.8h v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabdl2_8h:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: sabdl.8h v0, v0, v1
+; GISEL-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%load2 = load <16 x i8>, ptr %B
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -63,13 +73,23 @@ define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind {
ret <8 x i16> %tmp4
}
+; FALLBACK-NOT: remark:{{.*}} sabdl2_4s
define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: sabdl2_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1, #8]
-; CHECK-NEXT: sabdl.4s v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: sabdl2_4s:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr d0, [x0, #8]
+; DAG-NEXT: ldr d1, [x1, #8]
+; DAG-NEXT: sabdl.4s v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabdl2_4s:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: sabdl.4s v0, v0, v1
+; GISEL-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%load2 = load <8 x i16>, ptr %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -79,13 +99,23 @@ define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind {
ret <4 x i32> %tmp4
}
+; FALLBACK-NOT: remark:{{.*}} sabdl2_2d
define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: sabdl2_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1, #8]
-; CHECK-NEXT: sabdl.2d v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: sabdl2_2d:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr d0, [x0, #8]
+; DAG-NEXT: ldr d1, [x1, #8]
+; DAG-NEXT: sabdl.2d v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabdl2_2d:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: sabdl.2d v0, v0, v1
+; GISEL-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%load2 = load <4 x i32>, ptr %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -140,13 +170,23 @@ define <2 x i64> @uabdl2d(ptr %A, ptr %B) nounwind {
ret <2 x i64> %tmp4
}
+; FALLBACK-NOT: remark:{{.*}} uabdl2_8h
define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: uabdl2_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1, #8]
-; CHECK-NEXT: uabdl.8h v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: uabdl2_8h:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr d0, [x0, #8]
+; DAG-NEXT: ldr d1, [x1, #8]
+; DAG-NEXT: uabdl.8h v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabdl2_8h:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: uabdl.8h v0, v0, v1
+; GISEL-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%load2 = load <16 x i8>, ptr %B
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -157,13 +197,23 @@ define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind {
ret <8 x i16> %tmp4
}
+; FALLBACK-NOT: remark:{{.*}} uabdl2_4s
define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: uabdl2_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1, #8]
-; CHECK-NEXT: uabdl.4s v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: uabdl2_4s:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr d0, [x0, #8]
+; DAG-NEXT: ldr d1, [x1, #8]
+; DAG-NEXT: uabdl.4s v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabdl2_4s:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: uabdl.4s v0, v0, v1
+; GISEL-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%load2 = load <8 x i16>, ptr %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -173,13 +223,23 @@ define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind {
ret <4 x i32> %tmp4
}
+; FALLBACK-NOT: remark:{{.*}} uabdl2_2d
define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: uabdl2_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr d0, [x0, #8]
-; CHECK-NEXT: ldr d1, [x1, #8]
-; CHECK-NEXT: uabdl.2d v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: uabdl2_2d:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr d0, [x0, #8]
+; DAG-NEXT: ldr d1, [x1, #8]
+; DAG-NEXT: uabdl.2d v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabdl2_2d:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: uabdl.2d v0, v0, v1
+; GISEL-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%load2 = load <4 x i32>, ptr %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -971,14 +1031,25 @@ define <2 x i64> @sabal2d(ptr %A, ptr %B, ptr %C) nounwind {
ret <2 x i64> %tmp5
}
+; FALLBACK-NOT: remark:{{.*}} sabal2_8h
define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
-; CHECK-LABEL: sabal2_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1, #8]
-; CHECK-NEXT: sabal.8h v0, v1, v2
-; CHECK-NEXT: ret
+; DAG-LABEL: sabal2_8h:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr q0, [x2]
+; DAG-NEXT: ldr d1, [x0, #8]
+; DAG-NEXT: ldr d2, [x1, #8]
+; DAG-NEXT: sabal.8h v0, v1, v2
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabal2_8h:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v2, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: ldr q0, [x2]
+; GISEL-NEXT: sabal.8h v0, v2, v1
+; GISEL-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%load2 = load <16 x i8>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -990,14 +1061,25 @@ define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
ret <8 x i16> %tmp5
}
+; FALLBACK-NOT: remark:{{.*}} sabal2_4s
define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
-; CHECK-LABEL: sabal2_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1, #8]
-; CHECK-NEXT: sabal.4s v0, v1, v2
-; CHECK-NEXT: ret
+; DAG-LABEL: sabal2_4s:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr q0, [x2]
+; DAG-NEXT: ldr d1, [x0, #8]
+; DAG-NEXT: ldr d2, [x1, #8]
+; DAG-NEXT: sabal.4s v0, v1, v2
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabal2_4s:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v2, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: ldr q0, [x2]
+; GISEL-NEXT: sabal.4s v0, v2, v1
+; GISEL-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%load2 = load <8 x i16>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -1009,14 +1091,25 @@ define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
ret <4 x i32> %tmp5
}
+; FALLBACK-NOT: remark:{{.*}} sabal2_2d
define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind {
-; CHECK-LABEL: sabal2_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1, #8]
-; CHECK-NEXT: sabal.2d v0, v1, v2
-; CHECK-NEXT: ret
+; DAG-LABEL: sabal2_2d:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr q0, [x2]
+; DAG-NEXT: ldr d1, [x0, #8]
+; DAG-NEXT: ldr d2, [x1, #8]
+; DAG-NEXT: sabal.2d v0, v1, v2
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabal2_2d:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v2, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: ldr q0, [x2]
+; GISEL-NEXT: sabal.2d v0, v2, v1
+; GISEL-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%load2 = load <4 x i32>, ptr %B
%tmp3 = load <2 x i64>, ptr %C
@@ -1106,14 +1199,25 @@ define <2 x i64> @uabal2d(ptr %A, ptr %B, ptr %C) nounwind {
ret <2 x i64> %tmp5
}
+; FALLBACK-NOT: remark:{{.*}} uabal2_8h
define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
-; CHECK-LABEL: uabal2_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1, #8]
-; CHECK-NEXT: uabal.8h v0, v1, v2
-; CHECK-NEXT: ret
+; DAG-LABEL: uabal2_8h:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr q0, [x2]
+; DAG-NEXT: ldr d1, [x0, #8]
+; DAG-NEXT: ldr d2, [x1, #8]
+; DAG-NEXT: uabal.8h v0, v1, v2
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabal2_8h:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v2, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: ldr q0, [x2]
+; GISEL-NEXT: uabal.8h v0, v2, v1
+; GISEL-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%load2 = load <16 x i8>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -1125,14 +1229,25 @@ define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
ret <8 x i16> %tmp5
}
+; FALLBACK-NOT: remark:{{.*}} uabal2_4s
define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
-; CHECK-LABEL: uabal2_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1, #8]
-; CHECK-NEXT: uabal.4s v0, v1, v2
-; CHECK-NEXT: ret
+; DAG-LABEL: uabal2_4s:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr q0, [x2]
+; DAG-NEXT: ldr d1, [x0, #8]
+; DAG-NEXT: ldr d2, [x1, #8]
+; DAG-NEXT: uabal.4s v0, v1, v2
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabal2_4s:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v2, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: ldr q0, [x2]
+; GISEL-NEXT: uabal.4s v0, v2, v1
+; GISEL-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%load2 = load <8 x i16>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -1144,14 +1259,25 @@ define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
ret <4 x i32> %tmp5
}
+; FALLBACK-NOT: remark:{{.*}} uabal2_2d
define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind {
-; CHECK-LABEL: uabal2_2d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: ldr d1, [x0, #8]
-; CHECK-NEXT: ldr d2, [x1, #8]
-; CHECK-NEXT: uabal.2d v0, v1, v2
-; CHECK-NEXT: ret
+; DAG-LABEL: uabal2_2d:
+; DAG: // %bb.0:
+; DAG-NEXT: ldr q0, [x2]
+; DAG-NEXT: ldr d1, [x0, #8]
+; DAG-NEXT: ldr d2, [x1, #8]
+; DAG-NEXT: uabal.2d v0, v1, v2
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabal2_2d:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldr q0, [x0]
+; GISEL-NEXT: ldr q1, [x1]
+; GISEL-NEXT: ext.16b v2, v0, v0, #8
+; GISEL-NEXT: ext.16b v1, v1, v0, #8
+; GISEL-NEXT: ldr q0, [x2]
+; GISEL-NEXT: uabal.2d v0, v2, v1
+; GISEL-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%load2 = load <4 x i32>, ptr %B
%tmp3 = load <2 x i64>, ptr %C
@@ -1496,10 +1622,12 @@ define double @fabdd_from_fsub_fabs(double %a, double %b) nounwind {
declare float @llvm.fabs.f32(float) nounwind readnone
declare double @llvm.fabs.f64(double) nounwind readnone
+; FALLBACK-NOT: remark:{{.*}} uabdl_from_extract_dup
define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
; CHECK-LABEL: uabdl_from_extract_dup:
; CHECK: // %bb.0:
; CHECK-NEXT: dup.2s v1, w0
+; GISEL-NEXT: ext.16b v0, v0, v0, #0
; CHECK-NEXT: uabdl.2d v0, v0, v1
; CHECK-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
@@ -1512,12 +1640,20 @@ define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
ret <2 x i64> %res1
}
+; FALLBACK-NOT: remark:{{.*}} uabdl2_from_extract_dup
define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: uabdl2_from_extract_dup:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup.4s v1, w0
-; CHECK-NEXT: uabdl2.2d v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: uabdl2_from_extract_dup:
+; DAG: // %bb.0:
+; DAG-NEXT: dup.4s v1, w0
+; DAG-NEXT: uabdl2.2d v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: uabdl2_from_extract_dup:
+; GISEL: // %bb.0:
+; GISEL-NEXT: dup.2s v1, w0
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: uabdl.2d v0, v0, v1
+; GISEL-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
@@ -1528,10 +1664,12 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
ret <2 x i64> %res1
}
+; FALLBACK-NOT: remark:{{.*}} sabdl_from_extract_dup
define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
; CHECK-LABEL: sabdl_from_extract_dup:
; CHECK: // %bb.0:
; CHECK-NEXT: dup.2s v1, w0
+; GISEL-NEXT: ext.16b v0, v0, v0, #0
; CHECK-NEXT: sabdl.2d v0, v0, v1
; CHECK-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
@@ -1544,12 +1682,20 @@ define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
ret <2 x i64> %res1
}
+; FALLBACK-NOT: remark:{{.*}} sabdl2_from_extract_dup
define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: sabdl2_from_extract_dup:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup.4s v1, w0
-; CHECK-NEXT: sabdl2.2d v0, v0, v1
-; CHECK-NEXT: ret
+; DAG-LABEL: sabdl2_from_extract_dup:
+; DAG: // %bb.0:
+; DAG-NEXT: dup.4s v1, w0
+; DAG-NEXT: sabdl2.2d v0, v0, v1
+; DAG-NEXT: ret
+;
+; GISEL-LABEL: sabdl2_from_extract_dup:
+; GISEL: // %bb.0:
+; GISEL-NEXT: dup.2s v1, w0
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: sabdl.2d v0, v0, v1
+; GISEL-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index 269ffed98a844..f40baef48e719 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -31,7 +31,8 @@ define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ss
;
; GISEL-LABEL: test_vcvt_high_f64_f32:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: ret
%cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
%vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
@@ -79,7 +80,8 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea
;
; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: ret
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%bc2 = bitcast <2 x i32> %ext to <2 x float>
@@ -95,7 +97,8 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea
;
; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: ret
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%bc2 = bitcast <4 x i16> %ext to <2 x float>
@@ -111,7 +114,8 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read
;
; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: ret
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc2 = bitcast <8 x i8> %ext to <2 x float>
@@ -143,7 +147,8 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read
;
; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: ret
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%bc2 = bitcast <2 x i32> %ext to <4 x half>
@@ -159,7 +164,8 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read
;
; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: ret
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%bc2 = bitcast <4 x i16> %ext to <4 x half>
@@ -175,7 +181,8 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn
;
; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
+; GISEL-NEXT: ext.16b v0, v0, v0, #8
+; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: ret
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc2 = bitcast <8 x i8> %ext to <4 x half>
More information about the llvm-commits
mailing list