[llvm] [RISCV] Combine (mul (zext, zext)) -> (zext (mul (zext, zext))) (PR #86465)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 24 22:13:20 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Building on #<!-- -->86248, we can also narrow the width of a mul of zexts.
This is specifically legal because on RVV we always extend to the next power of 2 width, and multiplying two N bit integers produces a maximum value of 2\*N bits.
So as long as we keep an inner zext of 2\*N, we will have enough space for the multiply and won't overflow.
Alive2 proof: https://alive2.llvm.org/ce/z/XteYyb
---
Full diff: https://github.com/llvm/llvm-project/pull/86465.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll (+56-72)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4ace50aa477a9d..4f371fae398396 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12915,6 +12915,7 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
// add (zext, zext) -> zext (add (zext, zext))
// sub (zext, zext) -> sext (sub (zext, zext))
+// mul (zext, zext) -> zext (mul (zext, zext))
//
// where the sum of the extend widths match, and the the range of the bin op
// fits inside the width of the narrower bin op. (For profitability on rvv, we
@@ -13360,6 +13361,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
}
+ if (SDValue V = combineBinOpOfZExt(N, DAG))
+ return V;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
index bff7ef86c28960..b97c9654ad3cb4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
@@ -391,12 +391,12 @@ define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) {
define <2 x i32> @vwmulu_v2i32_v2i8(ptr %x, ptr %y) {
; CHECK-LABEL: vwmulu_v2i32_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vwmulu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
%b = load <2 x i8>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
index 539a4bdb27ad59..28fc53f37ba1d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
@@ -355,10 +355,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vsc
define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vwmulu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
%vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -402,11 +402,9 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vwmulu.vx v9, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v9
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -451,10 +449,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vsc
define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vwmulu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -498,11 +496,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vwmulu.vx v10, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -547,10 +543,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vzext.vf2 v14, v9
-; CHECK-NEXT: vwmulu.vv v8, v12, v14
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vwmulu.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -594,11 +590,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vzext.vf2 v14, v9
-; CHECK-NEXT: vwmulu.vv v8, v12, v14
+; CHECK-NEXT: vwmulu.vx v12, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -643,10 +637,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vzext.vf2 v20, v10
-; CHECK-NEXT: vwmulu.vv v8, v16, v20
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwmulu.vv v16, v8, v10
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -690,11 +684,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vzext.vf2 v20, v10
-; CHECK-NEXT: vwmulu.vv v8, v16, v20
+; CHECK-NEXT: vwmulu.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -739,10 +731,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscal
define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vzext.vf4 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vwmulu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
%vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -786,11 +778,9 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vzext.vf4 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vwmulu.vx v9, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v9
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -835,10 +825,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscal
define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vzext.vf4 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vwmulu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
%vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -882,11 +872,9 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vzext.vf4 v11, v9
-; CHECK-NEXT: vwmulu.vv v8, v10, v11
+; CHECK-NEXT: vwmulu.vx v10, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -931,10 +919,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v14, v9
-; CHECK-NEXT: vwmulu.vv v8, v12, v14
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vwmulu.vv v12, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
%vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -978,11 +966,9 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v14, v9
-; CHECK-NEXT: vwmulu.vv v8, v12, v14
+; CHECK-NEXT: vwmulu.vx v12, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -1027,10 +1013,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vzext.vf4 v20, v9
-; CHECK-NEXT: vwmulu.vv v8, v16, v20
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vv v16, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
%vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1074,11 +1060,9 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vzext.vf4 v20, v9
-; CHECK-NEXT: vwmulu.vv v8, v16, v20
+; CHECK-NEXT: vwmulu.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT: vzext.vf4 v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
``````````
</details>
https://github.com/llvm/llvm-project/pull/86465
More information about the llvm-commits
mailing list