[llvm] 5922a04 - [AArch64][SVE2p1] Make use of REVD instruction.

Tue Dec 6 07:43:04 PST 2022

Author: Sander de Smalen
Date: 2022-12-06T15:42:32Z
New Revision: 5922a04dbd665f56de562dda0ac4a482df15fc4f

URL: https://github.com/llvm/llvm-project/commit/5922a04dbd665f56de562dda0ac4a482df15fc4f
DIFF: https://github.com/llvm/llvm-project/commit/5922a04dbd665f56de562dda0ac4a482df15fc4f.diff

LOG: [AArch64][SVE2p1] Make use of REVD instruction.

Reversing double-words within a quard-word is possible using the REVD instruction
when SVE2p1 is enabled.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D139119

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9fcee1c9ae0e..2ad7d8e6763a 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10647,13 +10647,11 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
 /// instruction with the specified blocksize.  (The order of the elements
 /// within each block of the vector is reversed.)
 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
-  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
-         "Only possible block sizes for REV are: 16, 32, 64");
+  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 ||
+          BlockSize == 128) &&
+         "Only possible block sizes for REV are: 16, 32, 64, 128");
 
   unsigned EltSz = VT.getScalarSizeInBits();
-  if (EltSz == 64)
-    return false;
-
   unsigned NumElts = VT.getVectorNumElements();
   unsigned BlockElts = M[0] + 1;
   // If the first shuffle index is UNDEF, be optimistic.
@@ -23255,6 +23253,18 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
     }
   }
 
+  if (Subtarget->hasSVE2p1() && VT.getScalarSizeInBits() == 64 &&
+      isREVMask(ShuffleMask, VT, 128)) {
+    if (!VT.isFloatingPoint())
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
+
+    EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64));
+    Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
+    Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
+    Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
+    return convertFromScalableVector(DAG, VT, Op);
+  }
+
   unsigned WhichResult;
   if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
     return convertFromScalableVector(

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
index 49b9b90396db..a64e6bfec573 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
@@ -223,6 +223,37 @@ define void @test_rev_elts_fail(ptr %a) #1 {
   ret void
 }
 
+; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse
+; the double-words within quard-words.
+define void @test_revdv4i64_sve2p1(ptr %a) #2 {
+; CHECK-LABEL: test_revdv4i64_sve2p1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    revd z0.q, p0/m, z0.q
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %tmp1 = load <4 x i64>, ptr %a
+  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x i64> %tmp2, ptr %a
+  ret void
+}
+
+define void @test_revdv4f64_sve2p1(ptr %a) #2 {
+; CHECK-LABEL: test_revdv4f64_sve2p1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    revd z0.q, p1/m, z0.q
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %tmp1 = load <4 x double>, ptr %a
+  %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x double> %tmp2, ptr %a
+  ret void
+}
+
 ; REV instruction will reverse the order of all elements in the vector.
 ; When the vector length and the target register size are inconsistent,
 ; the correctness of generated REV instruction for shuffle pattern cannot be guaranteed.
@@ -472,3 +503,4 @@ define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 {
 
 attributes #0 = { "target-features"="+sve" }
 attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
+attributes #2 = { "target-features"="+sve2p1" }

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index cf378adc68e9..961c17f5a24e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -203,6 +203,38 @@ define void @test_rev_elts_fail(ptr %a) #0 {
   ret void
 }
 
+; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse
+; the double-words within quard-words.
+define void @test_revdv4i64_sve2p1(ptr %a) #1 {
+; CHECK-LABEL: test_revdv4i64_sve2p1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    revd z0.q, p0/m, z0.q
+; CHECK-NEXT:    revd z1.q, p0/m, z1.q
+; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %tmp1 = load <4 x i64>, ptr %a
+  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x i64> %tmp2, ptr %a
+  ret void
+}
+
+define void @test_revdv4f64_sve2p1(ptr %a) #1 {
+; CHECK-LABEL: test_revdv4f64_sve2p1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    revd z0.q, p0/m, z0.q
+; CHECK-NEXT:    revd z1.q, p0/m, z1.q
+; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %tmp1 = load <4 x double>, ptr %a
+  %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x double> %tmp2, ptr %a
+  ret void
+}
+
 define void @test_revv8i32(ptr %a) #0 {
 ; CHECK-LABEL: test_revv8i32:
 ; CHECK:       // %bb.0:
@@ -238,3 +270,4 @@ define void @test_revv8i32(ptr %a) #0 {
 }
 
 attributes #0 = { "target-features"="+sve" }
+attributes #1 = { "target-features"="+sve2p1" }