[llvm] c0d28d5 - [AArch64][SME] SelectSMETileSlice should also match to 'reg+0' when slice is ADD with non-constant RHS.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 24 07:38:36 PDT 2023


Author: Sander de Smalen
Date: 2023-03-24T14:37:16Z
New Revision: c0d28d58fafe4480a129298efb36120170f35fa0

URL: https://github.com/llvm/llvm-project/commit/c0d28d58fafe4480a129298efb36120170f35fa0
DIFF: https://github.com/llvm/llvm-project/commit/c0d28d58fafe4480a129298efb36120170f35fa0.diff

LOG: [AArch64][SME] SelectSMETileSlice should also match to 'reg+0' when slice is ADD with non-constant RHS.

It would decompose an address into a `reg + 0` when the slice was not an ADD,
but when the RHS of the ADD was not a constant, it would simply not match.

This patch fixes that, by always resolving to a `reg + 0` slice.

Added: 
    llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 2e95966c5ae19..cc941600481b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -6656,26 +6656,19 @@ bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
                                              SDValue &Base, SDValue &Offset,
                                              unsigned Scale) {
-  if (N.getOpcode() != ISD::ADD) {
-    Base = N;
-    Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
-    return true;
-  }
-
-  // Process an ADD node.
-  const SDValue LHS = N.getOperand(0);
-  const SDValue RHS = N.getOperand(1);
-
-  if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
-    int64_t ImmOff = C->getSExtValue();
-
-    if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0))
-      return false;
-
-    Base = LHS;
-    Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
-    return true;
-  }
+  // Try to untangle an ADD node into a 'reg + offset'
+  if (N.getOpcode() == ISD::ADD)
+    if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int64_t ImmOff = C->getSExtValue();
+      if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
+        Base = N.getOperand(0);
+        Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
+        return true;
+      }
+    }
 
-  return false;
+  // By default, just match reg + 0.
+  Base = N;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+  return true;
 }

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
new file mode 100644
index 0000000000000..f41791e626f5f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64"
+
+define <vscale x 2 x i64> @test_tileslice_no_add(i32 %idx) #0 {
+; CHECK-LABEL: test_tileslice_no_add:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
+; CHECK-NEXT:    ret
+entry:
+  %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx)
+  %read.ext = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %read, 0
+  ret <vscale x 2 x i64> %read.ext
+}
+
+define <vscale x 2 x i64> @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2) #0 {
+; CHECK-LABEL: test_tileslice_add_nonconstant:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
+; CHECK-NEXT:    ret
+entry:
+  %add = add i32 %idx1, %idx2
+  %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %add)
+  %read.ext = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %read, 0
+  ret <vscale x 2 x i64> %read.ext
+}
+
+declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32)
+
+attributes #0 = { nounwind "target-features"="+sme2" }


        


More information about the llvm-commits mailing list