[llvm] c0d28d5 - [AArch64][SME] SelectSMETileSlice should also match to 'reg+0' when slice is ADD with non-constant RHS.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 24 07:38:36 PDT 2023
Author: Sander de Smalen
Date: 2023-03-24T14:37:16Z
New Revision: c0d28d58fafe4480a129298efb36120170f35fa0
URL: https://github.com/llvm/llvm-project/commit/c0d28d58fafe4480a129298efb36120170f35fa0
DIFF: https://github.com/llvm/llvm-project/commit/c0d28d58fafe4480a129298efb36120170f35fa0.diff
LOG: [AArch64][SME] SelectSMETileSlice should also match to 'reg+0' when slice is ADD with non-constant RHS.
It would decompose an address into a `reg + 0` when the slice was not an ADD,
but when the RHS of the ADD was not a constant, it would simply not match.
This patch fixes that, by always resolving to a `reg + 0` slice.
Added:
llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 2e95966c5ae19..cc941600481b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -6656,26 +6656,19 @@ bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
SDValue &Base, SDValue &Offset,
unsigned Scale) {
- if (N.getOpcode() != ISD::ADD) {
- Base = N;
- Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
- return true;
- }
-
- // Process an ADD node.
- const SDValue LHS = N.getOperand(0);
- const SDValue RHS = N.getOperand(1);
-
- if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
- int64_t ImmOff = C->getSExtValue();
-
- if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0))
- return false;
-
- Base = LHS;
- Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
- return true;
- }
+ // Try to untangle an ADD node into a 'reg + offset'
+ if (N.getOpcode() == ISD::ADD)
+ if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t ImmOff = C->getSExtValue();
+ if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
+ return true;
+ }
+ }
- return false;
+ // By default, just match reg + 0.
+ Base = N;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+ return true;
}
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
new file mode 100644
index 0000000000000..f41791e626f5f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64"
+
+define <vscale x 2 x i64> @test_tileslice_no_add(i32 %idx) #0 {
+; CHECK-LABEL: test_tileslice_no_add:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1
+; CHECK-NEXT: ret
+entry:
+ %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx)
+ %read.ext = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %read, 0
+ ret <vscale x 2 x i64> %read.ext
+}
+
+define <vscale x 2 x i64> @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2) #0 {
+; CHECK-LABEL: test_tileslice_add_nonconstant:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1
+; CHECK-NEXT: ret
+entry:
+ %add = add i32 %idx1, %idx2
+ %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %add)
+ %read.ext = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %read, 0
+ ret <vscale x 2 x i64> %read.ext
+}
+
+declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32)
+
+attributes #0 = { nounwind "target-features"="+sme2" }
More information about the llvm-commits
mailing list