[llvm-branch-commits] [llvm] [LoongArch] Add patterns to support vector type average instructions generation (PR #161079)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Sep 28 18:35:36 PDT 2025
https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/161079
>From dbdb23407c01756a57fe6c6dbf9bc1e9254a81d3 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Sun, 28 Sep 2025 20:19:59 +0800
Subject: [PATCH 1/3] [LoongArch] Add patterns to support vector type average
instructions generation
---
.../LoongArch/LoongArchLASXInstrInfo.td | 50 +++++++++++++++++++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 50 +++++++++++++++++++
2 files changed, 100 insertions(+)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990ba1234..6eb68129d9dba 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2015,6 +2015,56 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
(XVFTINTRZ_LU_D v4f64:$vj)),
sub_128)>;
+// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
+def : Pat<(sra (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
+ (XVAVG_B v32i8:$xj, v32i8:$xk)>;
+def : Pat<(sra (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
+ (XVAVG_H v16i16:$xj, v16i16:$xk)>;
+def : Pat<(sra (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
+ (XVAVG_W v8i32:$xj, v8i32:$xk)>;
+def : Pat<(sra (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
+ (XVAVG_D v4i64:$xj, v4i64:$xk)>;
+def : Pat<(srl (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
+ (XVAVG_BU v32i8:$xj, v32i8:$xk)>;
+def : Pat<(srl (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
+ (XVAVG_HU v16i16:$xj, v16i16:$xk)>;
+def : Pat<(srl (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
+ (XVAVG_WU v8i32:$xj, v8i32:$xk)>;
+def : Pat<(srl (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
+ (XVAVG_DU v4i64:$xj, v4i64:$xk)>;
+def : Pat<(sra (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
+ (v32i8 (vsplat_imm_eq_1)))),
+ (v32i8 (vsplat_imm_eq_1))),
+ (XVAVGR_B v32i8:$vj, v32i8:$vk)>;
+def : Pat<(sra (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
+ (v16i16 (vsplat_imm_eq_1)))),
+ (v16i16 (vsplat_imm_eq_1))),
+ (XVAVGR_H v16i16:$vj, v16i16:$vk)>;
+def : Pat<(sra (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
+ (v8i32 (vsplat_imm_eq_1)))),
+ (v8i32 (vsplat_imm_eq_1))),
+ (XVAVGR_W v8i32:$vj, v8i32:$vk)>;
+def : Pat<(sra (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
+ (v4i64 (vsplat_imm_eq_1)))),
+ (v4i64 (vsplat_imm_eq_1))),
+ (XVAVGR_D v4i64:$vj, v4i64:$vk)>;
+def : Pat<(srl (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
+ (v32i8 (vsplat_imm_eq_1)))),
+ (v32i8 (vsplat_imm_eq_1))),
+ (XVAVGR_BU v32i8:$vj, v32i8:$vk)>;
+def : Pat<(srl (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
+ (v16i16 (vsplat_imm_eq_1)))),
+ (v16i16 (vsplat_imm_eq_1))),
+ (XVAVGR_HU v16i16:$vj, v16i16:$vk)>;
+def : Pat<(srl (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
+ (v8i32 (vsplat_imm_eq_1)))),
+ (v8i32 (vsplat_imm_eq_1))),
+ (XVAVGR_WU v8i32:$vj, v8i32:$vk)>;
+def : Pat<(srl (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
+ (v4i64 (vsplat_imm_eq_1)))),
+ (v4i64 (vsplat_imm_eq_1))),
+ (XVAVGR_DU v4i64:$vj, v4i64:$vk)>;
+
// XVABSD_{B/H/W/D}[U]
defm : PatXrXr<abds, "XVABSD">;
defm : PatXrXrU<abdu, "XVABSD">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index b0eb51a92c6c6..169f0d56c223e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2153,6 +2153,56 @@ def : Pat<(f32 f32imm_vldi:$in),
def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
+// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
+def : Pat<(sra (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
+ (VAVG_B v16i8:$vj, v16i8:$vk)>;
+def : Pat<(sra (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
+ (VAVG_H v8i16:$vj, v8i16:$vk)>;
+def : Pat<(sra (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
+ (VAVG_W v4i32:$vj, v4i32:$vk)>;
+def : Pat<(sra (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
+ (VAVG_D v2i64:$vj, v2i64:$vk)>;
+def : Pat<(srl (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
+ (VAVG_BU v16i8:$vj, v16i8:$vk)>;
+def : Pat<(srl (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
+ (VAVG_HU v8i16:$vj, v8i16:$vk)>;
+def : Pat<(srl (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
+ (VAVG_WU v4i32:$vj, v4i32:$vk)>;
+def : Pat<(srl (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
+ (VAVG_DU v2i64:$vj, v2i64:$vk)>;
+def : Pat<(sra (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
+ (v16i8 (vsplat_imm_eq_1)))),
+ (v16i8 (vsplat_imm_eq_1))),
+ (VAVGR_B v16i8:$vj, v16i8:$vk)>;
+def : Pat<(sra (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
+ (v8i16 (vsplat_imm_eq_1)))),
+ (v8i16 (vsplat_imm_eq_1))),
+ (VAVGR_H v8i16:$vj, v8i16:$vk)>;
+def : Pat<(sra (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
+ (v4i32 (vsplat_imm_eq_1)))),
+ (v4i32 (vsplat_imm_eq_1))),
+ (VAVGR_W v4i32:$vj, v4i32:$vk)>;
+def : Pat<(sra (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
+ (v2i64 (vsplat_imm_eq_1)))),
+ (v2i64 (vsplat_imm_eq_1))),
+ (VAVGR_D v2i64:$vj, v2i64:$vk)>;
+def : Pat<(srl (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
+ (v16i8 (vsplat_imm_eq_1)))),
+ (v16i8 (vsplat_imm_eq_1))),
+ (VAVGR_BU v16i8:$vj, v16i8:$vk)>;
+def : Pat<(srl (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
+ (v8i16 (vsplat_imm_eq_1)))),
+ (v8i16 (vsplat_imm_eq_1))),
+ (VAVGR_HU v8i16:$vj, v8i16:$vk)>;
+def : Pat<(srl (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
+ (v4i32 (vsplat_imm_eq_1)))),
+ (v4i32 (vsplat_imm_eq_1))),
+ (VAVGR_WU v4i32:$vj, v4i32:$vk)>;
+def : Pat<(srl (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
+ (v2i64 (vsplat_imm_eq_1)))),
+ (v2i64 (vsplat_imm_eq_1))),
+ (VAVGR_DU v2i64:$vj, v2i64:$vk)>;
+
// VABSD_{B/H/W/D}[U]
defm : PatVrVr<abds, "VABSD">;
defm : PatVrVrU<abdu, "VABSD">;
>From 56ae70e24c741c7b367323039989bef923f4f126 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Sun, 28 Sep 2025 20:20:13 +0800
Subject: [PATCH 2/3] update tests
---
.../LoongArch/lasx/ir-instruction/avg.ll | 146 ++++++++++--------
.../LoongArch/lsx/ir-instruction/avg.ll | 146 ++++++++++--------
2 files changed, 160 insertions(+), 132 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
index 2a5a8fa05d646..5c5c19935080b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: xvavg_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,14 +54,22 @@ entry:
}
define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavg_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -129,14 +131,22 @@ entry:
}
define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavg_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,15 +211,23 @@ entry:
}
define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavgr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -287,15 +293,23 @@ entry:
}
define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: xvavgr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
-; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
+; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
index 20b8898436cc4..334af22edee59 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vavg_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -25,8 +24,7 @@ define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -43,8 +41,7 @@ define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,14 +54,22 @@ entry:
}
define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavg_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavg_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -97,8 +101,7 @@ define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -115,8 +118,7 @@ define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -129,14 +131,22 @@ entry:
}
define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavg_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavg_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavg_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavg.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -151,9 +161,7 @@ define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.b $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -171,9 +179,7 @@ define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -191,9 +197,7 @@ define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -207,15 +211,23 @@ entry:
}
define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavgr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.du $vr0, $vr0, 1
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavgr_d:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vaddi.du $vr0, $vr0, 1
+; LA32-NEXT: vsrai.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavgr_d:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavgr.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
@@ -231,9 +243,7 @@ define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -251,9 +261,7 @@ define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.h $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -271,9 +279,7 @@ define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.w $vr0, $vr0, 1
+; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -287,15 +293,23 @@ entry:
}
define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
-; CHECK-LABEL: vavgr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vld $vr1, $a2, 0
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vaddi.du $vr0, $vr0, 1
-; CHECK-NEXT: vsrli.d $vr0, $vr0, 1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vavgr_du:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vld $vr1, $a2, 0
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vaddi.du $vr0, $vr0, 1
+; LA32-NEXT: vsrli.d $vr0, $vr0, 1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vavgr_du:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vld $vr1, $a2, 0
+; LA64-NEXT: vavgr.du $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
entry:
%va = load <2 x i64>, ptr %a
%vb = load <2 x i64>, ptr %b
>From 97ee0817c7752cd67213471f587979eeee66533b Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Mon, 29 Sep 2025 09:32:17 +0800
Subject: [PATCH 3/3] using multiclass
---
.../LoongArch/LoongArchLASXInstrInfo.td | 64 ++++------------
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 76 +++++++------------
2 files changed, 44 insertions(+), 96 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 6eb68129d9dba..5149cf355c1fd 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2016,54 +2016,22 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
sub_128)>;
// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
-def : Pat<(sra (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
- (XVAVG_B v32i8:$xj, v32i8:$xk)>;
-def : Pat<(sra (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
- (XVAVG_H v16i16:$xj, v16i16:$xk)>;
-def : Pat<(sra (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
- (XVAVG_W v8i32:$xj, v8i32:$xk)>;
-def : Pat<(sra (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
- (XVAVG_D v4i64:$xj, v4i64:$xk)>;
-def : Pat<(srl (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
- (XVAVG_BU v32i8:$xj, v32i8:$xk)>;
-def : Pat<(srl (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
- (XVAVG_HU v16i16:$xj, v16i16:$xk)>;
-def : Pat<(srl (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
- (XVAVG_WU v8i32:$xj, v8i32:$xk)>;
-def : Pat<(srl (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
- (XVAVG_DU v4i64:$xj, v4i64:$xk)>;
-def : Pat<(sra (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
- (v32i8 (vsplat_imm_eq_1)))),
- (v32i8 (vsplat_imm_eq_1))),
- (XVAVGR_B v32i8:$vj, v32i8:$vk)>;
-def : Pat<(sra (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
- (v16i16 (vsplat_imm_eq_1)))),
- (v16i16 (vsplat_imm_eq_1))),
- (XVAVGR_H v16i16:$vj, v16i16:$vk)>;
-def : Pat<(sra (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
- (v8i32 (vsplat_imm_eq_1)))),
- (v8i32 (vsplat_imm_eq_1))),
- (XVAVGR_W v8i32:$vj, v8i32:$vk)>;
-def : Pat<(sra (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
- (v4i64 (vsplat_imm_eq_1)))),
- (v4i64 (vsplat_imm_eq_1))),
- (XVAVGR_D v4i64:$vj, v4i64:$vk)>;
-def : Pat<(srl (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
- (v32i8 (vsplat_imm_eq_1)))),
- (v32i8 (vsplat_imm_eq_1))),
- (XVAVGR_BU v32i8:$vj, v32i8:$vk)>;
-def : Pat<(srl (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
- (v16i16 (vsplat_imm_eq_1)))),
- (v16i16 (vsplat_imm_eq_1))),
- (XVAVGR_HU v16i16:$vj, v16i16:$vk)>;
-def : Pat<(srl (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
- (v8i32 (vsplat_imm_eq_1)))),
- (v8i32 (vsplat_imm_eq_1))),
- (XVAVGR_WU v8i32:$vj, v8i32:$vk)>;
-def : Pat<(srl (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
- (v4i64 (vsplat_imm_eq_1)))),
- (v4i64 (vsplat_imm_eq_1))),
- (XVAVGR_DU v4i64:$vj, v4i64:$vk)>;
+defm : VAvgPat<sra, "XVAVG_B", v32i8>;
+defm : VAvgPat<sra, "XVAVG_H", v16i16>;
+defm : VAvgPat<sra, "XVAVG_W", v8i32>;
+defm : VAvgPat<sra, "XVAVG_D", v4i64>;
+defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
+defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
+defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
+defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
+defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
+defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
+defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
+defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
+defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
+defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
+defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
+defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
// XVABSD_{B/H/W/D}[U]
defm : PatXrXr<abds, "XVABSD">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 169f0d56c223e..7932e86d1aea8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1517,6 +1517,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
}
+multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
+multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
+ (vt (vsplat_imm_eq_1)))),
+ (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -2154,54 +2166,22 @@ def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
-def : Pat<(sra (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
- (VAVG_B v16i8:$vj, v16i8:$vk)>;
-def : Pat<(sra (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
- (VAVG_H v8i16:$vj, v8i16:$vk)>;
-def : Pat<(sra (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
- (VAVG_W v4i32:$vj, v4i32:$vk)>;
-def : Pat<(sra (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
- (VAVG_D v2i64:$vj, v2i64:$vk)>;
-def : Pat<(srl (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
- (VAVG_BU v16i8:$vj, v16i8:$vk)>;
-def : Pat<(srl (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
- (VAVG_HU v8i16:$vj, v8i16:$vk)>;
-def : Pat<(srl (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
- (VAVG_WU v4i32:$vj, v4i32:$vk)>;
-def : Pat<(srl (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
- (VAVG_DU v2i64:$vj, v2i64:$vk)>;
-def : Pat<(sra (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
- (v16i8 (vsplat_imm_eq_1)))),
- (v16i8 (vsplat_imm_eq_1))),
- (VAVGR_B v16i8:$vj, v16i8:$vk)>;
-def : Pat<(sra (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
- (v8i16 (vsplat_imm_eq_1)))),
- (v8i16 (vsplat_imm_eq_1))),
- (VAVGR_H v8i16:$vj, v8i16:$vk)>;
-def : Pat<(sra (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
- (v4i32 (vsplat_imm_eq_1)))),
- (v4i32 (vsplat_imm_eq_1))),
- (VAVGR_W v4i32:$vj, v4i32:$vk)>;
-def : Pat<(sra (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
- (v2i64 (vsplat_imm_eq_1)))),
- (v2i64 (vsplat_imm_eq_1))),
- (VAVGR_D v2i64:$vj, v2i64:$vk)>;
-def : Pat<(srl (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
- (v16i8 (vsplat_imm_eq_1)))),
- (v16i8 (vsplat_imm_eq_1))),
- (VAVGR_BU v16i8:$vj, v16i8:$vk)>;
-def : Pat<(srl (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
- (v8i16 (vsplat_imm_eq_1)))),
- (v8i16 (vsplat_imm_eq_1))),
- (VAVGR_HU v8i16:$vj, v8i16:$vk)>;
-def : Pat<(srl (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
- (v4i32 (vsplat_imm_eq_1)))),
- (v4i32 (vsplat_imm_eq_1))),
- (VAVGR_WU v4i32:$vj, v4i32:$vk)>;
-def : Pat<(srl (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
- (v2i64 (vsplat_imm_eq_1)))),
- (v2i64 (vsplat_imm_eq_1))),
- (VAVGR_DU v2i64:$vj, v2i64:$vk)>;
+defm : VAvgPat<sra, "VAVG_B", v16i8>;
+defm : VAvgPat<sra, "VAVG_H", v8i16>;
+defm : VAvgPat<sra, "VAVG_W", v4i32>;
+defm : VAvgPat<sra, "VAVG_D", v2i64>;
+defm : VAvgPat<srl, "VAVG_BU", v16i8>;
+defm : VAvgPat<srl, "VAVG_HU", v8i16>;
+defm : VAvgPat<srl, "VAVG_WU", v4i32>;
+defm : VAvgPat<srl, "VAVG_DU", v2i64>;
+defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
+defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
+defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
+defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
+defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
+defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
+defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
+defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
// VABSD_{B/H/W/D}[U]
defm : PatVrVr<abds, "VABSD">;
More information about the llvm-branch-commits
mailing list