[llvm] [Hexagon] Add HVX patterns for vector arithmetic (PR #170704)
Fateme Hosseini via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 08:10:24 PST 2025
https://github.com/fhossein-quic updated https://github.com/llvm/llvm-project/pull/170704
>From 259c15e0109247fd6224ac0765955473f793e340 Mon Sep 17 00:00:00 2001
From: Fateme Hosseini <quic_fhossein at quicinc.com>
Date: Thu, 4 Dec 2025 09:08:32 -0800
Subject: [PATCH] [Hexagon] Add HVX patterns for widening vector arithmetic
This patch Introduces instruction selection patterns to generate the
widening vadd, vsub, and vmul HVX vector instructions.
These patterns match on standard IR-level vector operations and lower
them to the corresponding Hexagon HVX intrinsics.
Patch By: Fateme Hosseini
Co-authored-by: Jyotsna Verma <jverma at qti.qualcomm.com>
---
llvm/lib/Target/Hexagon/HexagonPatterns.td | 1 +
llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 64 ++++++++
llvm/test/CodeGen/Hexagon/autohvx/arith.ll | 140 +-----------------
3 files changed, 67 insertions(+), 138 deletions(-)
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index e84070f1a5468..e84a3286eaa9a 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -380,6 +380,7 @@ multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
+def Sext: pf1<sext>; def Zext: pf1<zext>;
def Smin: pf2<smin>; def Smax: pf2<smax>;
def Umin: pf2<umin>; def Umax: pf2<umax>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 674d19176a88b..64bb93a5ca8f8 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -76,6 +76,12 @@ def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
+def VShuff: OutPatFrag<(ops node:$Vs, node:$S),
+ (V6_vshuffvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
+
+def VDeal: OutPatFrag<(ops node:$Vs, node:$S),
+ (V6_vdealvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
+
class VSubi<InstHexagon VSub, InstHexagon VSplati>:
OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>;
@@ -402,6 +408,64 @@ class Vneg1<ValueType VecTy>
class Vnot<ValueType VecTy>
: PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
+class ExtOp_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
+ PatFrag VPred, int Shuff>
+ : Pat<(ResType (Op (Ext VPred:$Vs), (Ext VPred:$Vt))),
+ (VShuff (MI VPred:$Vs, VPred:$Vt), Shuff)>;
+
+class VOpAcc_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
+ PatFrag VxPred, PatFrag VsPred, int Shuff>
+ : Pat<(ResType (add VxPred:$Vx, (Op (Ext VsPred:$Vs), (Ext VsPred:$Vt)))),
+ (VShuff (MI (VDeal $Vx, Shuff), VsPred:$Vs, VsPred:$Vt), Shuff)>;
+
+let Predicates = [UseHVX] in {
+ let AddedComplexity = 200 in {
+ def : ExtOp_pat<V6_vaddubh, Add, Zext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vadduhw, Add, Zext, VecPI32, HVI16, -4>;
+ def : ExtOp_pat<V6_vaddhw, Add, Sext, VecPI32, HVI16, -4>;
+
+ def : ExtOp_pat<V6_vsububh, Sub, Zext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vsubuhw, Sub, Zext, VecPI32, HVI16, -4>;
+ def : ExtOp_pat<V6_vsubhw, Sub, Sext, VecPI32, HVI16, -4>;
+
+ def : ExtOp_pat<V6_vmpybv, Mul, Sext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vmpyhv, Mul, Sext, VecPI32, HVI16, -4>;
+ def : ExtOp_pat<V6_vmpyubv, Mul, Zext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vmpyuhv, Mul, Zext, VecPI32, HVI16, -4>;
+
+ // The first operand in V6_vmpybusv is unsigned.
+ def : Pat<(VecPI16 (mul (VecPI16 (zext HVI8:$Vs)),
+ (VecPI16 (sext HVI8:$Vv)))),
+ (VShuff (V6_vmpybusv HVI8:$Vs, HVI8:$Vv), -2)>;
+
+ // The second operand in V6_vmpyhus is unsigned.
+ def : Pat<(VecPI32 (mul (VecPI32 (sext HVI16:$Vs)),
+ (VecPI32 (zext HVI16:$Vv)))),
+ (VShuff (V6_vmpyhus HVI16:$Vs, HVI16:$Vv), -4)>;
+
+ def : VOpAcc_pat<V6_vaddubh_acc, Add, Zext, VecPI16, HWI16, HVI8, -2>;
+ def : VOpAcc_pat<V6_vadduhw_acc, Add, Zext, VecPI32, HWI32, HVI16, -4>;
+ def : VOpAcc_pat<V6_vaddhw_acc, Add, Sext, VecPI32, HWI32, HVI16, -4>;
+
+ def : VOpAcc_pat<V6_vmpybv_acc, Mul, Sext, VecPI16, HWI16, HVI8, -2>;
+ def : VOpAcc_pat<V6_vmpyubv_acc, Mul, Zext, VecPI16, HWI16, HVI8, -2>;
+ def : VOpAcc_pat<V6_vmpyhv_acc, Mul, Sext, VecPI32, HWI32, HVI16, -4>;
+ def : VOpAcc_pat<V6_vmpyuhv_acc, Mul, Zext, VecPI32, HWI32, HVI16, -4>;
+
+ // The second operand in V6_vmpybusv_acc is unsigned.
+ def : Pat<(VecPI16 (add HWI16:$Vx , (mul (VecPI16 (zext HVI8:$Vs)),
+ (VecPI16 (sext HVI8:$Vt))))),
+ (VShuff (V6_vmpybusv_acc (VDeal $Vx, -2),
+ HVI8:$Vs, HVI8:$Vt), -2)>;
+
+ // The third operand in V6_vmpyhus_acc is unsigned.
+ def : Pat<(add HWI32:$Vx, (mul (VecPI32 (sext HVI16:$Vs)),
+ (VecPI32 (zext HVI16:$Vt)))),
+ (VShuff (V6_vmpyhus_acc (VDeal $Vx, -4),
+ HVI16:$Vs, HVI16:$Vt), -4)>;
+ }
+}
+
let Predicates = [UseHVX] in {
let AddedComplexity = 200 in {
def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
index f45dce7791118..291243299c534 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
@@ -132,141 +132,5 @@ define <32 x i32> @xorw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
ret <32 x i32> %p
}
-; --- add
-
-; CHECK-LABEL: addb_64:
-; CHECK: vadd(v0.b,v1.b)
-define <64 x i8> @addb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
- %p = add <64 x i8> %v0, %v1
- ret <64 x i8> %p
-}
-
-; CHECK-LABEL: addb_128:
-; CHECK: vadd(v0.b,v1.b)
-define <128 x i8> @addb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
- %p = add <128 x i8> %v0, %v1
- ret <128 x i8> %p
-}
-
-; CHECK-LABEL: addh_64:
-; CHECK: vadd(v0.h,v1.h)
-define <32 x i16> @addh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
- %p = add <32 x i16> %v0, %v1
- ret <32 x i16> %p
-}
-
-; CHECK-LABEL: addh_128:
-; CHECK: vadd(v0.h,v1.h)
-define <64 x i16> @addh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
- %p = add <64 x i16> %v0, %v1
- ret <64 x i16> %p
-}
-
-; CHECK-LABEL: addw_64:
-; CHECK: vadd(v0.w,v1.w)
-define <16 x i32> @addw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
- %p = add <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: addw_128:
-; CHECK: vadd(v0.w,v1.w)
-define <32 x i32> @addw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = add <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-; --- sub
-
-; CHECK-LABEL: subb_64:
-; CHECK: vsub(v0.b,v1.b)
-define <64 x i8> @subb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
- %p = sub <64 x i8> %v0, %v1
- ret <64 x i8> %p
-}
-
-; CHECK-LABEL: subb_128:
-; CHECK: vsub(v0.b,v1.b)
-define <128 x i8> @subb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
- %p = sub <128 x i8> %v0, %v1
- ret <128 x i8> %p
-}
-
-; CHECK-LABEL: subh_64:
-; CHECK: vsub(v0.h,v1.h)
-define <32 x i16> @subh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
- %p = sub <32 x i16> %v0, %v1
- ret <32 x i16> %p
-}
-
-; CHECK-LABEL: subh_128:
-; CHECK: vsub(v0.h,v1.h)
-define <64 x i16> @subh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
- %p = sub <64 x i16> %v0, %v1
- ret <64 x i16> %p
-}
-
-; CHECK-LABEL: subw_64:
-; CHECK: vsub(v0.w,v1.w)
-define <16 x i32> @subw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
- %p = sub <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: subw_128:
-; CHECK: vsub(v0.w,v1.w)
-define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = sub <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-; --- mul
-
-; CHECK-LABEL: mpyb_64:
-; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
-define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
- %p = mul <64 x i8> %v0, %v1
- ret <64 x i8> %p
-}
-
-; CHECK-LABEL: mpyb_128:
-; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
-define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
- %p = mul <128 x i8> %v0, %v1
- ret <128 x i8> %p
-}
-
-; CHECK-LABEL: mpyh_64:
-; CHECK: vmpyi(v0.h,v1.h)
-define <32 x i16> @mpyh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
- %p = mul <32 x i16> %v0, %v1
- ret <32 x i16> %p
-}
-
-; CHECK-LABEL: mpyh_128:
-; CHECK: vmpyi(v0.h,v1.h)
-define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
- %p = mul <64 x i16> %v0, %v1
- ret <64 x i16> %p
-}
-
-; CHECK-LABEL: mpyw_64:
-; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
-; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
-define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
- %p = mul <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: mpyw_128:
-; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
-; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
-define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = mul <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
-attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
+attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
+attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
More information about the llvm-commits
mailing list