[llvm] a7dcedc - [RISCV] Add initial batch of test coverage for zvqdotq codegen
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 22 15:53:21 PDT 2025
Author: Philip Reames
Date: 2025-04-22T15:52:46-07:00
New Revision: a7dcedc5cf73bdeb4e3bb8cd8e32b65349b75b6e
URL: https://github.com/llvm/llvm-project/commit/a7dcedc5cf73bdeb4e3bb8cd8e32b65349b75b6e
DIFF: https://github.com/llvm/llvm-project/commit/a7dcedc5cf73bdeb4e3bb8cd8e32b65349b75b6e.diff
LOG: [RISCV] Add initial batch of test coverage for zvqdotq codegen
This is not complete coverage, but it's a starting point for working
on codegen for this extension.
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
new file mode 100644
index 0000000000000..25192ea19aab3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
@@ -0,0 +1,211 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvqdotq -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvqdotq -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @vqdot_vv(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vqdot_vv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: vsext.vf2 v14, v9
+; CHECK-NEXT: vwmul.vv v8, v12, v14
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.sext = sext <16 x i8> %a to <16 x i32>
+ %b.sext = sext <16 x i8> %b to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vqdot_vx_constant(<16 x i8> %a) {
+; CHECK-LABEL: vqdot_vx_constant:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: li a0, 23
+; CHECK-NEXT: vwmul.vx v8, v12, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.sext = sext <16 x i8> %a to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.sext, splat (i32 23)
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vqdot_vx_constant_swapped(<16 x i8> %a) {
+; CHECK-LABEL: vqdot_vx_constant_swapped:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: li a0, 23
+; CHECK-NEXT: vwmul.vx v8, v12, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.sext = sext <16 x i8> %a to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> splat (i32 23), %a.sext
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vqdotu_vv(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vqdotu_vv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwredsumu.vs v8, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.zext = zext <16 x i8> %a to <16 x i32>
+ %b.zext = zext <16 x i8> %b to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.zext, %b.zext
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vqdotu_vx_constant(<16 x i8> %a) {
+; CHECK-LABEL: vqdotu_vx_constant:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: li a0, 123
+; CHECK-NEXT: vwmulu.vx v8, v12, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.zext = zext <16 x i8> %a to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.zext, splat (i32 123)
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vqdotsu_vv(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vqdotsu_vv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: vzext.vf2 v14, v9
+; CHECK-NEXT: vwmulsu.vv v8, v12, v14
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.sext = sext <16 x i8> %a to <16 x i32>
+ %b.zext = zext <16 x i8> %b to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.sext, %b.zext
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vqdotsu_vv_swapped(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vqdotsu_vv_swapped:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: vzext.vf2 v14, v9
+; CHECK-NEXT: vwmulsu.vv v8, v12, v14
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.sext = sext <16 x i8> %a to <16 x i32>
+ %b.zext = zext <16 x i8> %b to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %b.zext, %a.sext
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vdotqsu_vx_constant(<16 x i8> %a) {
+; CHECK-LABEL: vdotqsu_vx_constant:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: li a0, 123
+; CHECK-NEXT: vwmul.vx v8, v12, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.sext = sext <16 x i8> %a to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.sext, splat (i32 123)
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @vdotqus_vx_constant(<16 x i8> %a) {
+; CHECK-LABEL: vdotqus_vx_constant:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: li a0, -23
+; CHECK-NEXT: vmv.v.x v14, a0
+; CHECK-NEXT: vwmulsu.vv v8, v14, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v12, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v12
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.zext = zext <16 x i8> %a to <16 x i32>
+ %mul = mul nuw nsw <16 x i32> %a.zext, splat (i32 -23)
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+ ret i32 %res
+}
+
+define i32 @reduce_of_sext(<16 x i8> %a) {
+; CHECK-LABEL: reduce_of_sext:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsext.vf4 v12, v8
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vredsum.vs v8, v12, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.ext = sext <16 x i8> %a to <16 x i32>
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a.ext)
+ ret i32 %res
+}
+
+define i32 @reduce_of_zext(<16 x i8> %a) {
+; CHECK-LABEL: reduce_of_zext:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v12, v8
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vredsum.vs v8, v12, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+entry:
+ %a.ext = zext <16 x i8> %a to <16 x i32>
+ %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a.ext)
+ ret i32 %res
+}
More information about the llvm-commits
mailing list