[llvm] a88e837 - [SVE] Add more gather/scatter tests to highlight bugs in their generated code.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 7 09:15:12 PDT 2022
Author: Paul Walker
Date: 2022-04-07T17:13:48+01:00
New Revision: a88e8374db3d6a0ede8a456cbfe6d5ffdc5ae8f9
URL: https://github.com/llvm/llvm-project/commit/a88e8374db3d6a0ede8a456cbfe6d5ffdc5ae8f9
DIFF: https://github.com/llvm/llvm-project/commit/a88e8374db3d6a0ede8a456cbfe6d5ffdc5ae8f9.diff
LOG: [SVE] Add more gather/scatter tests to highlight bugs in their generated code.
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
index 7f2d4e6ea464e..d06cc313ba539 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
@@ -374,6 +374,46 @@ define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with__vec_plus_imm_offsets
ret <vscale x 2 x i64> %data
}
+; TODO: The generated code is wrong because we've lost the sign extension which
+; defines bits offsets[8:31].
+define <vscale x 4 x i32> @masked_gather_nxv4i32_s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv4i32_s8_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT: ret
+ %offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
+ %ptrs = getelementptr i32, i32* %base, <vscale x 4 x i32> %offsets.sext
+ %data = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
+ ret <vscale x 4 x i32> %data
+}
+
+define <vscale x 4 x i32> @masked_gather_nxv4i32_u8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv4i32_u8_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
+; CHECK-NEXT: ret
+ %offsets.zext = zext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
+ %ptrs = getelementptr i32, i32* %base, <vscale x 4 x i32> %offsets.zext
+ %data = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
+ ret <vscale x 4 x i32> %data
+}
+
+; TODO: The generated code is wrong because we've lost the sign extension which
+; defines bits offsets[8:31] and we're also replicating offset[31] across
+; offset[32:63] even though the IR has explicitly zero'd those bits.
+define <vscale x 4 x i32> @masked_gather_nxv4i32_u32s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv4i32_u32s8_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT: ret
+ %offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
+ %offsets.sext.zext = zext <vscale x 4 x i32> %offsets.sext to <vscale x 4 x i64>
+ %ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %offsets.sext.zext
+ %data = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
+ ret <vscale x 4 x i32> %data
+}
+
define void @masked_scatter_nxv2i64_const_with_vec_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %data) #0 {
; CHECK-LABEL: masked_scatter_nxv2i64_const_with_vec_offsets:
; CHECK: // %bb.0:
@@ -416,15 +456,57 @@ define void @masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets(<vscale x 2
ret void
}
+; TODO: The generated code is wrong because we've lost the sign extension which
+; defines bits offsets[8:31].
+define void @masked_scatter_nxv4i32_s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %data) #0 {
+; CHECK-LABEL: masked_scatter_nxv4i32_s8_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
+; CHECK-NEXT: ret
+ %offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
+ %ptrs = getelementptr i32, i32* %base, <vscale x 4 x i32> %offsets.sext
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv4i32_u8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %data) #0 {
+; CHECK-LABEL: masked_scatter_nxv4i32_u8_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, uxtw #2]
+; CHECK-NEXT: ret
+ %offsets.zext = zext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
+ %ptrs = getelementptr i32, i32* %base, <vscale x 4 x i32> %offsets.zext
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mask)
+ ret void
+}
+
+; TODO: The generated code is wrong because we've lost the sign extension which
+; defines bits offsets[8:31] and we're also replicating offset[31] across
+; offset[32:63] even though the IR has explicitly zero'd those bits.
+define void @masked_scatter_nxv4i32_u32s8_offsets(i32* %base, <vscale x 4 x i8> %offsets, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %data) #0 {
+; CHECK-LABEL: masked_scatter_nxv4i32_u32s8_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
+; CHECK-NEXT: ret
+ %offsets.sext = sext <vscale x 4 x i8> %offsets to <vscale x 4 x i32>
+ %offsets.sext.zext = zext <vscale x 4 x i32> %offsets.sext to <vscale x 4 x i64>
+ %ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %offsets.sext.zext
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mask)
+ ret void
+}
+
attributes #0 = { "target-features"="+sve" vscale_range(1, 16) }
-declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
+declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x float*>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
declare void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
More information about the llvm-commits
mailing list