[llvm] 5922a04 - [AArch64][SVE2p1] Make use of REVD instruction.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 6 07:43:04 PST 2022
Author: Sander de Smalen
Date: 2022-12-06T15:42:32Z
New Revision: 5922a04dbd665f56de562dda0ac4a482df15fc4f
URL: https://github.com/llvm/llvm-project/commit/5922a04dbd665f56de562dda0ac4a482df15fc4f
DIFF: https://github.com/llvm/llvm-project/commit/5922a04dbd665f56de562dda0ac4a482df15fc4f.diff
LOG: [AArch64][SVE2p1] Make use of REVD instruction.
Reversing double-words within a quard-word is possible using the REVD instruction
when SVE2p1 is enabled.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D139119
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9fcee1c9ae0e..2ad7d8e6763a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10647,13 +10647,11 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for REV are: 16, 32, 64");
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 ||
+ BlockSize == 128) &&
+ "Only possible block sizes for REV are: 16, 32, 64, 128");
unsigned EltSz = VT.getScalarSizeInBits();
- if (EltSz == 64)
- return false;
-
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
@@ -23255,6 +23253,18 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
}
}
+ if (Subtarget->hasSVE2p1() && VT.getScalarSizeInBits() == 64 &&
+ isREVMask(ShuffleMask, VT, 128)) {
+ if (!VT.isFloatingPoint())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
+
+ EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64));
+ Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
+ Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
+ Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
+ return convertFromScalableVector(DAG, VT, Op);
+ }
+
unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
return convertFromScalableVector(
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
index 49b9b90396db..a64e6bfec573 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
@@ -223,6 +223,37 @@ define void @test_rev_elts_fail(ptr %a) #1 {
ret void
}
+; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse
+; the double-words within quard-words.
+define void @test_revdv4i64_sve2p1(ptr %a) #2 {
+; CHECK-LABEL: test_revdv4i64_sve2p1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: revd z0.q, p0/m, z0.q
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i64>, ptr %a
+ %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ store <4 x i64> %tmp2, ptr %a
+ ret void
+}
+
+define void @test_revdv4f64_sve2p1(ptr %a) #2 {
+; CHECK-LABEL: test_revdv4f64_sve2p1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d, vl4
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: revd z0.q, p1/m, z0.q
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x double>, ptr %a
+ %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ store <4 x double> %tmp2, ptr %a
+ ret void
+}
+
; REV instruction will reverse the order of all elements in the vector.
; When the vector length and the target register size are inconsistent,
; the correctness of generated REV instruction for shuffle pattern cannot be guaranteed.
@@ -472,3 +503,4 @@ define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 {
attributes #0 = { "target-features"="+sve" }
attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
+attributes #2 = { "target-features"="+sve2p1" }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index cf378adc68e9..961c17f5a24e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -203,6 +203,38 @@ define void @test_rev_elts_fail(ptr %a) #0 {
ret void
}
+; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse
+; the double-words within quard-words.
+define void @test_revdv4i64_sve2p1(ptr %a) #1 {
+; CHECK-LABEL: test_revdv4i64_sve2p1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: revd z0.q, p0/m, z0.q
+; CHECK-NEXT: revd z1.q, p0/m, z1.q
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i64>, ptr %a
+ %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ store <4 x i64> %tmp2, ptr %a
+ ret void
+}
+
+define void @test_revdv4f64_sve2p1(ptr %a) #1 {
+; CHECK-LABEL: test_revdv4f64_sve2p1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: revd z0.q, p0/m, z0.q
+; CHECK-NEXT: revd z1.q, p0/m, z1.q
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x double>, ptr %a
+ %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ store <4 x double> %tmp2, ptr %a
+ ret void
+}
+
define void @test_revv8i32(ptr %a) #0 {
; CHECK-LABEL: test_revv8i32:
; CHECK: // %bb.0:
@@ -238,3 +270,4 @@ define void @test_revv8i32(ptr %a) #0 {
}
attributes #0 = { "target-features"="+sve" }
+attributes #1 = { "target-features"="+sve2p1" }
More information about the llvm-commits
mailing list