[llvm] [RISCV] Match (ext (op a, b)) to (wop a, b) (PR #137508)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 27 01:34:20 PDT 2025
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/137508
This suboptimal case was found when trying to optimize ABD/ABDS
operation.
Adding ISel patterns is the simplest way to optimize. We can add
DAGCombine cases for `ISD::SIGN_EXTEND/ISD::ZERO_EXTEND` instead
but that may need a lot of manual handlings.
>From 7f686e0ea7a161f258d22ec93bfdc5b2fa7110ac Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Sun, 27 Apr 2025 16:25:23 +0800
Subject: [PATCH] [RISCV] Match (ext (op a, b)) to (wop a, b)
This suboptimal case was found when trying to optimize ABD/ABDS
operation.
Adding ISel patterns is the simplest way to optimize. We can add
DAGCombine cases for `ISD::SIGN_EXTEND/ISD::ZERO_EXTEND` instead
but that may need a lot of manual handlings.
---
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 11 +++++
llvm/test/CodeGen/RISCV/rvv/abd.ll | 44 +++++++------------
2 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index aea125c5348dd..55a5109b7ecfa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -467,6 +467,17 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
+ if !eq(extop1, extop2) then
+ def : Pat<(wti.Vector (extop1 (op (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(wti.Vector (extop1 (op (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
index 583d872238df7..b8d95bd95df8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -58,10 +58,8 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vmin.vv v10, v8, v9
-; CHECK-NEXT: vmax.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmax.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -91,10 +89,8 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmin.vv v10, v8, v9
-; CHECK-NEXT: vmax.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmax.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -124,10 +120,8 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmin.vv v10, v8, v9
-; CHECK-NEXT: vmax.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmax.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -192,10 +186,8 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
-; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmaxu.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -225,10 +217,8 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
-; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmaxu.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -258,10 +248,8 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vminu.vv v10, v8, v9
-; CHECK-NEXT: vmaxu.vv v8, v8, v9
-; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vmaxu.vv v11, v8, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
; CHECK-NEXT: ret
%a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -296,11 +284,9 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vminu.vv v8, v10, v9
-; CHECK-NEXT: vmaxu.vv v9, v10, v9
-; CHECK-NEXT: vsub.vv v10, v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v8, v10
+; CHECK-NEXT: vminu.vv v11, v10, v9
+; CHECK-NEXT: vmaxu.vv v10, v10, v9
+; CHECK-NEXT: vwsubu.vv v8, v10, v11
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
More information about the llvm-commits
mailing list