[llvm] 199c6ec - [RISCV] Add coverage for missed scalarization of gather/scatter base pointers
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 08:43:12 PDT 2025
Author: Philip Reames
Date: 2025-06-30T08:42:52-07:00
New Revision: 199c6ecb5cd0a90f5981b8c8a43470bc48265899
URL: https://github.com/llvm/llvm-project/commit/199c6ecb5cd0a90f5981b8c8a43470bc48265899
DIFF: https://github.com/llvm/llvm-project/commit/199c6ecb5cd0a90f5981b8c8a43470bc48265899.diff
LOG: [RISCV] Add coverage for missed scalarization of gather/scatter base pointers
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 2757e140ecde5..3057ee9293992 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -2373,3 +2373,170 @@ define <vscale x 1 x i8> @mgather_baseidx_zext_nxv1i1_nxv1i8(ptr %base, <vscale
%v = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru)
ret <vscale x 1 x i8> %v
}
+
+define <4 x i32> @scalar_prefix(ptr %base, i32 signext %index, <4 x i32> %vecidx) {
+; RV32-LABEL: scalar_prefix:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsll.vi v9, v9, 10
+; RV32-NEXT: vadd.vx v9, v9, a0
+; RV32-NEXT: vsll.vi v8, v8, 2
+; RV32-NEXT: vadd.vv v8, v9, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: scalar_prefix:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 1024
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vmv.v.x v9, a2
+; RV64-NEXT: vwmaccsu.vx v10, a1, v9
+; RV64-NEXT: li a0, 4
+; RV64-NEXT: vwmaccus.vx v10, a0, v8
+; RV64-NEXT: vluxei64.v v8, (zero), v10
+; RV64-NEXT: ret
+ %gep = getelementptr [256 x i32], ptr %base, i32 %index, <4 x i32> %vecidx
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @scalar_prefix_with_splat(ptr %base, i32 %index, <4 x i32> %vecidx) {
+; RV32-LABEL: scalar_prefix_with_splat:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsll.vi v9, v9, 10
+; RV32-NEXT: vadd.vx v9, v9, a0
+; RV32-NEXT: vsll.vi v8, v8, 2
+; RV32-NEXT: vadd.vv v8, v9, v8
+; RV32-NEXT: vluxei32.v v8, (zero), v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: scalar_prefix_with_splat:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 1024
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vmv.v.x v9, a2
+; RV64-NEXT: vwmaccsu.vx v10, a1, v9
+; RV64-NEXT: li a0, 4
+; RV64-NEXT: vwmaccus.vx v10, a0, v8
+; RV64-NEXT: vluxei64.v v8, (zero), v10
+; RV64-NEXT: ret
+ %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %index, i32 0
+ %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+
+ %gep = getelementptr [256 x i32], ptr %base, <4 x i32> %broadcast.splat, <4 x i32> %vecidx
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @scalar_prefix_with_constant_splat(ptr %base, <4 x i32> %vecidx) {
+; RV32-LABEL: scalar_prefix_with_constant_splat:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a1, 5
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsll.vi v8, v8, 2
+; RV32-NEXT: vluxei32.v v8, (a0), v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: scalar_prefix_with_constant_splat:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 4
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vwmulsu.vx v10, v8, a1
+; RV64-NEXT: lui a1, 5
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: vluxei64.v v8, (a0), v10
+; RV64-NEXT: ret
+ %gep = getelementptr [256 x i32], ptr %base, <4 x i32> splat (i32 20), <4 x i32> %vecidx
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @reassociate(ptr %base, i32 %index, <4 x i32> %vecidx) {
+; RV32-LABEL: reassociate:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsll.vi v8, v8, 10
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vadd.vx v8, v8, a0
+; RV32-NEXT: vsll.vi v9, v9, 2
+; RV32-NEXT: vadd.vv v8, v8, v9
+; RV32-NEXT: vluxei32.v v8, (zero), v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reassociate:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: li a0, 1024
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vwmaccus.vx v10, a0, v8
+; RV64-NEXT: vmv.v.i v8, 4
+; RV64-NEXT: vwmaccsu.vx v10, a1, v8
+; RV64-NEXT: vluxei64.v v8, (zero), v10
+; RV64-NEXT: ret
+ %gep = getelementptr [256 x i32], ptr %base, <4 x i32> %vecidx, i32 %index
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @reassociate_with_splat(ptr %base, i32 %index, <4 x i32> %vecidx) {
+; RV32-LABEL: reassociate_with_splat:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v9, a1
+; RV32-NEXT: vsll.vi v8, v8, 10
+; RV32-NEXT: vadd.vx v8, v8, a0
+; RV32-NEXT: vsll.vi v9, v9, 2
+; RV32-NEXT: vadd.vv v8, v8, v9
+; RV32-NEXT: vluxei32.v v8, (zero), v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reassociate_with_splat:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: li a0, 1024
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vwmaccus.vx v10, a0, v8
+; RV64-NEXT: vmv.v.i v8, 4
+; RV64-NEXT: vwmaccsu.vx v10, a1, v8
+; RV64-NEXT: vluxei64.v v8, (zero), v10
+; RV64-NEXT: ret
+ %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %index, i32 0
+ %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+
+ %gep = getelementptr [256 x i32], ptr %base, <4 x i32> %vecidx, <4 x i32> %broadcast.splat
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @reassociate_with_constant_splat(ptr %base, i32 %index, <4 x i32> %vecidx) {
+; RV32-LABEL: reassociate_with_constant_splat:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsll.vi v8, v8, 10
+; RV32-NEXT: addi a0, a0, 80
+; RV32-NEXT: vluxei32.v v8, (a0), v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reassociate_with_constant_splat:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 1024
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vwmulsu.vx v10, v8, a1
+; RV64-NEXT: addi a0, a0, 80
+; RV64-NEXT: vluxei64.v v8, (a0), v10
+; RV64-NEXT: ret
+ %gep = getelementptr [256 x i32], ptr %base, <4 x i32> %vecidx, <4 x i32> splat (i32 20)
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
+
More information about the llvm-commits
mailing list