[llvm] 59588f0 - [SVE][ISel] Ensure explicit gather/scatter offset extension isn't lost.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 29 06:23:52 PDT 2022
Author: Paul Walker
Date: 2022-04-29T14:20:13+01:00
New Revision: 59588f0a3d47e3e366d675b8f9724c10a6222c0e
URL: https://github.com/llvm/llvm-project/commit/59588f0a3d47e3e366d675b8f9724c10a6222c0e
DIFF: https://github.com/llvm/llvm-project/commit/59588f0a3d47e3e366d675b8f9724c10a6222c0e.diff
LOG: [SVE][ISel] Ensure explicit gather/scatter offset extension isn't lost.
getGatherScatterIndexIsExtended currently looks through all
SIGN_EXTEND_INREG operations regardless of their input type. This
patch restricts the code to only look through i32->i64 extensions,
which are the ones supported implicitly by SVE addressing modes.
Differential Revision: https://reviews.llvm.org/D123318
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3f6a36fe49f4..89bbced4c738 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4630,9 +4630,13 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
}
bool getGatherScatterIndexIsExtended(SDValue Index) {
+ // Ignore non-pointer sized indices.
+ if (Index.getValueType() != MVT::nxv2i64)
+ return false;
+
unsigned Opcode = Index.getOpcode();
if (Opcode == ISD::SIGN_EXTEND_INREG)
- return true;
+ return cast<VTSDNode>(Index.getOperand(1))->getVT() == MVT::nxv2i32;
if (Opcode == ISD::AND) {
SDValue Splat = Index.getOperand(1);
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
index 4fdf4a106dbc..f4b4a033c343 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
@@ -374,11 +374,11 @@ define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with__vec_plus_imm_offsets
ret <vscale x 2 x i64> %data
}
-; TODO: The generated code is wrong because we've lost the sign extension which
-; defines bits offsets[8:31].
define <vscale x 4 x i32> @masked_gather_nxv4i32_s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: masked_gather_nxv4i32_s8_offsets:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: sxtb z0.s, p1/m, z0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
; CHECK-NEXT: ret
%offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
@@ -399,12 +399,13 @@ define <vscale x 4 x i32> @masked_gather_nxv4i32_u8_offsets(i32* %base, <vscale
ret <vscale x 4 x i32> %data
}
-; TODO: The generated code is wrong because we've lost the sign extension which
-; defines bits offsets[8:31] and we're also replicating offset[31] across
+; TODO: The generated code is wrong because we're replicating offset[31] across
; offset[32:63] even though the IR has explicitly zero'd those bits.
define <vscale x 4 x i32> @masked_gather_nxv4i32_u32s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: masked_gather_nxv4i32_u32s8_offsets:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: sxtb z0.s, p1/m, z0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
; CHECK-NEXT: ret
%offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
@@ -456,11 +457,11 @@ define void @masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets(<vscale x 2
ret void
}
-; TODO: The generated code is wrong because we've lost the sign extension which
-; defines bits offsets[8:31].
define void @masked_scatter_nxv4i32_s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %data) #0 {
; CHECK-LABEL: masked_scatter_nxv4i32_s8_offsets:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: sxtb z0.s, p1/m, z0.s
; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
; CHECK-NEXT: ret
%offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
@@ -481,12 +482,13 @@ define void @masked_scatter_nxv4i32_u8_offsets(i32* %base, <vscale x 4 x i8> %of
ret void
}
-; TODO: The generated code is wrong because we've lost the sign extension which
-; defines bits offsets[8:31] and we're also replicating offset[31] across
+; TODO: The generated code is wrong because we're replicating offset[31] across
; offset[32:63] even though the IR has explicitly zero'd those bits.
define void @masked_scatter_nxv4i32_u32s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %data) #0 {
; CHECK-LABEL: masked_scatter_nxv4i32_u32s8_offsets:
; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: sxtb z0.s, p1/m, z0.s
; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
; CHECK-NEXT: ret
%offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
More information about the llvm-commits
mailing list