[llvm] 5e407f0 - [SVE] Add gather/scatter tests to highlight bugs in their generated code.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 6 07:32:29 PDT 2022
Author: Paul Walker
Date: 2022-04-06T15:30:29+01:00
New Revision: 5e407f0887bf8173018ed7ec1348277ff4e853d7
URL: https://github.com/llvm/llvm-project/commit/5e407f0887bf8173018ed7ec1348277ff4e853d7
DIFF: https://github.com/llvm/llvm-project/commit/5e407f0887bf8173018ed7ec1348277ff4e853d7.diff
LOG: [SVE] Add gather/scatter tests to highlight bugs in their generated code.
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
index e894291845d41..7f2d4e6ea464e 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
@@ -331,12 +331,100 @@ define void @scatter_f16_index_add_add_mul([8 x half]* %base, i64 %offset, i64 %
call void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half> %data, <vscale x 4 x half*> %gep.bc, i32 2, <vscale x 4 x i1> %pg)
ret void
}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i64_const_with_vec_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: masked_gather_nxv2i64_const_with_vec_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d, lsl #3]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i64, i64* inttoptr (i64 8 to i64*), <vscale x 2 x i64> %vector_offsets
+ %data = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %pg, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %data
+}
+
+; TODO: The generated code is wrong because we've lost the scaling applied to
+; %scalar_offset when it's used to calculate %ptrs.
+define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with_vec_plus_scalar_offsets(<vscale x 2 x i64> %vector_offsets, i64 %scalar_offset, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: masked_gather_nxv2i64_null_with_vec_plus_scalar_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
+; CHECK-NEXT: ret
+ %scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 %scalar_offset, i64 0
+ %scalar_offset.splat = shufflevector <vscale x 2 x i64> %scalar_offset.ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %offsets = add <vscale x 2 x i64> %vector_offsets, %scalar_offset.splat
+ %ptrs = getelementptr i64, i64* null, <vscale x 2 x i64> %offsets
+ %data = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %pg, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %data
+}
+
+; TODO: The generated code is wrong because we've lost the scaling applied to
+; constant scalar offset (i.e. i64 1) when it's used to calculate %ptrs.
+define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with__vec_plus_imm_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: masked_gather_nxv2i64_null_with__vec_plus_imm_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d, lsl #3]
+; CHECK-NEXT: ret
+ %scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 1, i64 0
+ %scalar_offset.splat = shufflevector <vscale x 2 x i64> %scalar_offset.ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %offsets = add <vscale x 2 x i64> %vector_offsets, %scalar_offset.splat
+ %ptrs = getelementptr i64, i64* null, <vscale x 2 x i64> %offsets
+ %data = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %pg, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %data
+}
+
+define void @masked_scatter_nxv2i64_const_with_vec_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %data) #0 {
+; CHECK-LABEL: masked_scatter_nxv2i64_const_with_vec_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8
+; CHECK-NEXT: st1d { z1.d }, p0, [x8, z0.d, lsl #3]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i64, i64* inttoptr (i64 8 to i64*), <vscale x 2 x i64> %vector_offsets
+ call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %pg)
+ ret void
+}
+
+; TODO: The generated code is wrong because we've lost the scaling applied to
+; %scalar_offset when it's used to calculate %ptrs.
+define void @masked_scatter_nxv2i64_null_with_vec_plus_scalar_offsets(<vscale x 2 x i64> %vector_offsets, i64 %scalar_offset, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %data) #0 {
+; CHECK-LABEL: masked_scatter_nxv2i64_null_with_vec_plus_scalar_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z1.d }, p0, [x0, z0.d, lsl #3]
+; CHECK-NEXT: ret
+ %scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 %scalar_offset, i64 0
+ %scalar_offset.splat = shufflevector <vscale x 2 x i64> %scalar_offset.ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %offsets = add <vscale x 2 x i64> %vector_offsets, %scalar_offset.splat
+ %ptrs = getelementptr i64, i64* null, <vscale x 2 x i64> %offsets
+ call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %pg)
+ ret void
+}
+
+; TODO: The generated code is wrong because we've lost the scaling applied to
+; constant scalar offset (i.e. i64 1) when it's used to calculate %ptrs.
+define void @masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %data) #0 {
+; CHECK-LABEL: masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: st1d { z1.d }, p0, [x8, z0.d, lsl #3]
+; CHECK-NEXT: ret
+ %scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 1, i64 0
+ %scalar_offset.splat = shufflevector <vscale x 2 x i64> %scalar_offset.ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %offsets = add <vscale x 2 x i64> %vector_offsets, %scalar_offset.splat
+ %ptrs = getelementptr i64, i64* null, <vscale x 2 x i64> %offsets
+ call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %pg)
+ ret void
+}
+
attributes #0 = { "target-features"="+sve" vscale_range(1, 16) }
+declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x float*>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
declare void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
+
declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
More information about the llvm-commits
mailing list