[llvm] [Hexagon] Add HVX patterns for vector arithmetic (PR #170704)

Fateme Hosseini via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 15:19:17 PST 2025


https://github.com/fhossein-quic updated https://github.com/llvm/llvm-project/pull/170704

>From e237e1c4719be728a4cbb7987c8123c06c7a384a Mon Sep 17 00:00:00 2001
From: Fateme Hosseini <quic_fhossein at quicinc.com>
Date: Thu, 4 Dec 2025 09:08:32 -0800
Subject: [PATCH] [Hexagon] Add HVX patterns for vector arithmetic

This patch Introduces instruction selection patterns to generate the
vsub, vadd, vmpy, vmin, and vmax HVX vector instructions.
These patterns match on standard IR-level vector operations and lower
them to the corresponding Hexagon HVX intrinsics.

Patch By: Fateme Hosseini

Co-authored-by: Jyotsna Verma <jverma at qti.qualcomm.com>
---
 llvm/lib/Target/Hexagon/HexagonPatterns.td    |   1 +
 llvm/lib/Target/Hexagon/HexagonPatternsHVX.td |  64 +++++
 llvm/test/CodeGen/Hexagon/autohvx/arith.ll    | 140 +---------
 .../Hexagon/autohvx/vector-compare-128b.ll    | 248 +++++++++++++-----
 .../Hexagon/autohvx/vector-compare-64b.ll     |   2 +-
 5 files changed, 257 insertions(+), 198 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index e84070f1a5468..e84a3286eaa9a 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -380,6 +380,7 @@ multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
 def Add: pf2<add>;    def And: pf2<and>;    def Sra: pf2<sra>;
 def Sub: pf2<sub>;    def Or:  pf2<or>;     def Srl: pf2<srl>;
 def Mul: pf2<mul>;    def Xor: pf2<xor>;    def Shl: pf2<shl>;
+def Sext: pf1<sext>;  def Zext: pf1<zext>;
 
 def Smin: pf2<smin>;  def Smax: pf2<smax>;
 def Umin: pf2<umin>;  def Umax: pf2<umax>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 674d19176a88b..64bb93a5ca8f8 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -76,6 +76,12 @@ def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh  $Vs)>;
 def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
 def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
 
+def VShuff: OutPatFrag<(ops node:$Vs, node:$S),
+                       (V6_vshuffvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
+
+def VDeal: OutPatFrag<(ops node:$Vs, node:$S),
+                      (V6_vdealvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
+
 class VSubi<InstHexagon VSub, InstHexagon VSplati>:
   OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>;
 
@@ -402,6 +408,64 @@ class Vneg1<ValueType VecTy>
 class Vnot<ValueType VecTy>
   : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
 
+class ExtOp_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
+                 PatFrag VPred, int Shuff>
+  : Pat<(ResType (Op (Ext VPred:$Vs), (Ext VPred:$Vt))),
+        (VShuff (MI VPred:$Vs, VPred:$Vt), Shuff)>;
+
+class VOpAcc_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
+                  PatFrag VxPred, PatFrag VsPred, int Shuff>
+  : Pat<(ResType (add VxPred:$Vx, (Op (Ext VsPred:$Vs), (Ext VsPred:$Vt)))),
+        (VShuff (MI (VDeal $Vx, Shuff), VsPred:$Vs, VsPred:$Vt), Shuff)>;
+
+let Predicates = [UseHVX] in {
+  let AddedComplexity = 200 in {
+    def : ExtOp_pat<V6_vaddubh, Add, Zext, VecPI16, HVI8, -2>;
+    def : ExtOp_pat<V6_vadduhw, Add, Zext, VecPI32, HVI16, -4>;
+    def : ExtOp_pat<V6_vaddhw, Add, Sext, VecPI32, HVI16, -4>;
+
+    def : ExtOp_pat<V6_vsububh, Sub, Zext, VecPI16, HVI8, -2>;
+    def : ExtOp_pat<V6_vsubuhw, Sub, Zext, VecPI32, HVI16, -4>;
+    def : ExtOp_pat<V6_vsubhw, Sub, Sext, VecPI32, HVI16, -4>;
+
+    def : ExtOp_pat<V6_vmpybv, Mul, Sext, VecPI16, HVI8, -2>;
+    def : ExtOp_pat<V6_vmpyhv, Mul, Sext, VecPI32, HVI16, -4>;
+    def : ExtOp_pat<V6_vmpyubv, Mul, Zext, VecPI16, HVI8, -2>;
+    def : ExtOp_pat<V6_vmpyuhv, Mul, Zext, VecPI32, HVI16, -4>;
+
+    // The first operand in V6_vmpybusv is unsigned.
+    def : Pat<(VecPI16 (mul (VecPI16 (zext HVI8:$Vs)),
+                            (VecPI16 (sext HVI8:$Vv)))),
+              (VShuff (V6_vmpybusv HVI8:$Vs, HVI8:$Vv), -2)>;
+
+    // The second operand in V6_vmpyhus is unsigned.
+    def : Pat<(VecPI32 (mul (VecPI32 (sext HVI16:$Vs)),
+                            (VecPI32 (zext HVI16:$Vv)))),
+              (VShuff (V6_vmpyhus HVI16:$Vs, HVI16:$Vv), -4)>;
+
+    def : VOpAcc_pat<V6_vaddubh_acc, Add, Zext, VecPI16, HWI16, HVI8, -2>;
+    def : VOpAcc_pat<V6_vadduhw_acc, Add, Zext, VecPI32, HWI32, HVI16, -4>;
+    def : VOpAcc_pat<V6_vaddhw_acc, Add, Sext, VecPI32, HWI32, HVI16, -4>;
+
+    def : VOpAcc_pat<V6_vmpybv_acc, Mul, Sext, VecPI16, HWI16, HVI8, -2>;
+    def : VOpAcc_pat<V6_vmpyubv_acc, Mul, Zext, VecPI16, HWI16, HVI8, -2>;
+    def : VOpAcc_pat<V6_vmpyhv_acc, Mul, Sext, VecPI32, HWI32, HVI16, -4>;
+    def : VOpAcc_pat<V6_vmpyuhv_acc, Mul, Zext, VecPI32, HWI32, HVI16, -4>;
+
+    // The second operand in V6_vmpybusv_acc is unsigned.
+    def : Pat<(VecPI16 (add HWI16:$Vx , (mul (VecPI16 (zext HVI8:$Vs)),
+                                             (VecPI16 (sext HVI8:$Vt))))),
+              (VShuff (V6_vmpybusv_acc (VDeal $Vx, -2),
+                                       HVI8:$Vs, HVI8:$Vt), -2)>;
+
+    // The third operand in V6_vmpyhus_acc is unsigned.
+    def : Pat<(add HWI32:$Vx, (mul (VecPI32 (sext HVI16:$Vs)),
+                                   (VecPI32 (zext HVI16:$Vt)))),
+              (VShuff (V6_vmpyhus_acc (VDeal $Vx, -4),
+                                      HVI16:$Vs, HVI16:$Vt), -4)>;
+  }
+}
+
 let Predicates = [UseHVX] in {
   let AddedComplexity = 200 in {
     def: Pat<(Vnot<VecI8>   HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
index f45dce7791118..291243299c534 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
@@ -132,141 +132,5 @@ define <32 x i32> @xorw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
   ret <32 x i32> %p
 }
 
-; --- add
-
-; CHECK-LABEL: addb_64:
-; CHECK: vadd(v0.b,v1.b)
-define <64 x i8> @addb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
-  %p = add <64 x i8> %v0, %v1
-  ret <64 x i8> %p
-}
-
-; CHECK-LABEL: addb_128:
-; CHECK: vadd(v0.b,v1.b)
-define <128 x i8> @addb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
-  %p = add <128 x i8> %v0, %v1
-  ret <128 x i8> %p
-}
-
-; CHECK-LABEL: addh_64:
-; CHECK: vadd(v0.h,v1.h)
-define <32 x i16> @addh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
-  %p = add <32 x i16> %v0, %v1
-  ret <32 x i16> %p
-}
-
-; CHECK-LABEL: addh_128:
-; CHECK: vadd(v0.h,v1.h)
-define <64 x i16> @addh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
-  %p = add <64 x i16> %v0, %v1
-  ret <64 x i16> %p
-}
-
-; CHECK-LABEL: addw_64:
-; CHECK: vadd(v0.w,v1.w)
-define <16 x i32> @addw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
-  %p = add <16 x i32> %v0, %v1
-  ret <16 x i32> %p
-}
-
-; CHECK-LABEL: addw_128:
-; CHECK: vadd(v0.w,v1.w)
-define <32 x i32> @addw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
-  %p = add <32 x i32> %v0, %v1
-  ret <32 x i32> %p
-}
-
-; --- sub
-
-; CHECK-LABEL: subb_64:
-; CHECK: vsub(v0.b,v1.b)
-define <64 x i8> @subb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
-  %p = sub <64 x i8> %v0, %v1
-  ret <64 x i8> %p
-}
-
-; CHECK-LABEL: subb_128:
-; CHECK: vsub(v0.b,v1.b)
-define <128 x i8> @subb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
-  %p = sub <128 x i8> %v0, %v1
-  ret <128 x i8> %p
-}
-
-; CHECK-LABEL: subh_64:
-; CHECK: vsub(v0.h,v1.h)
-define <32 x i16> @subh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
-  %p = sub <32 x i16> %v0, %v1
-  ret <32 x i16> %p
-}
-
-; CHECK-LABEL: subh_128:
-; CHECK: vsub(v0.h,v1.h)
-define <64 x i16> @subh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
-  %p = sub <64 x i16> %v0, %v1
-  ret <64 x i16> %p
-}
-
-; CHECK-LABEL: subw_64:
-; CHECK: vsub(v0.w,v1.w)
-define <16 x i32> @subw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
-  %p = sub <16 x i32> %v0, %v1
-  ret <16 x i32> %p
-}
-
-; CHECK-LABEL: subw_128:
-; CHECK: vsub(v0.w,v1.w)
-define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
-  %p = sub <32 x i32> %v0, %v1
-  ret <32 x i32> %p
-}
-
-; --- mul
-
-; CHECK-LABEL: mpyb_64:
-; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
-define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
-  %p = mul <64 x i8> %v0, %v1
-  ret <64 x i8> %p
-}
-
-; CHECK-LABEL: mpyb_128:
-; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
-define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
-  %p = mul <128 x i8> %v0, %v1
-  ret <128 x i8> %p
-}
-
-; CHECK-LABEL: mpyh_64:
-; CHECK: vmpyi(v0.h,v1.h)
-define <32 x i16> @mpyh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
-  %p = mul <32 x i16> %v0, %v1
-  ret <32 x i16> %p
-}
-
-; CHECK-LABEL: mpyh_128:
-; CHECK: vmpyi(v0.h,v1.h)
-define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
-  %p = mul <64 x i16> %v0, %v1
-  ret <64 x i16> %p
-}
-
-; CHECK-LABEL: mpyw_64:
-; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
-; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
-define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
-  %p = mul <16 x i32> %v0, %v1
-  ret <16 x i32> %p
-}
-
-; CHECK-LABEL: mpyw_128:
-; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
-; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
-define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
-  %p = mul <32 x i32> %v0, %v1
-  ret <32 x i32> %p
-}
-
-attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
-attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
+attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
+attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
index a9483037e14b1..94e746513490a 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s
 
 ; --- Byte
 
 ; CHECK-LABEL: test_00:
 ; CHECK: q[[Q000:[0-3]]] = vcmp.eq(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q000]],v1,v2)
-define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp eq <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -14,7 +14,7 @@ define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_01:
 ; CHECK: q[[Q010:[0-3]]] = vcmp.eq(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q010]],v2,v1)
-define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp ne <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -23,7 +23,7 @@ define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_02:
 ; CHECK: q[[Q020:[0-3]]] = vcmp.gt(v1.b,v0.b)
 ; CHECK: v0 = vmux(q[[Q020]],v1,v2)
-define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp slt <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -32,7 +32,7 @@ define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_03:
 ; CHECK: q[[Q030:[0-3]]] = vcmp.gt(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q030]],v2,v1)
-define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp sle <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -41,7 +41,7 @@ define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_04:
 ; CHECK: q[[Q040:[0-3]]] = vcmp.gt(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q040]],v1,v2)
-define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp sgt <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -50,7 +50,7 @@ define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_05:
 ; CHECK: q[[Q050:[0-3]]] = vcmp.gt(v1.b,v0.b)
 ; CHECK: v0 = vmux(q[[Q050]],v2,v1)
-define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp sge <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -59,7 +59,7 @@ define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_06:
 ; CHECK: q[[Q060:[0-3]]] = vcmp.gt(v1.ub,v0.ub)
 ; CHECK: v0 = vmux(q[[Q060]],v1,v2)
-define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp ult <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -68,7 +68,7 @@ define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_07:
 ; CHECK: q[[Q070:[0-3]]] = vcmp.gt(v0.ub,v1.ub)
 ; CHECK: v0 = vmux(q[[Q070]],v2,v1)
-define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp ule <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -77,7 +77,7 @@ define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_08:
 ; CHECK: q[[Q080:[0-3]]] = vcmp.gt(v0.ub,v1.ub)
 ; CHECK: v0 = vmux(q[[Q080]],v1,v2)
-define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp ugt <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -86,7 +86,7 @@ define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_09:
 ; CHECK: q[[Q090:[0-3]]] = vcmp.gt(v1.ub,v0.ub)
 ; CHECK: v0 = vmux(q[[Q090]],v2,v1)
-define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %t0 = icmp uge <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2
   ret <128 x i8> %t1
@@ -95,7 +95,7 @@ define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0a:
 ; CHECK: q[[Q0A0:[0-3]]] &= vcmp.eq(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q0A0]],v0,v1)
-define <128 x i8> @test_0a(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0a(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp eq <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = and <128 x i1> %q0, %q1
@@ -106,7 +106,7 @@ define <128 x i8> @test_0a(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0b:
 ; CHECK: q[[Q0B0:[0-3]]] |= vcmp.eq(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q0B0]],v0,v1)
-define <128 x i8> @test_0b(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0b(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp eq <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = or <128 x i1> %q0, %q1
@@ -117,7 +117,7 @@ define <128 x i8> @test_0b(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0c:
 ; CHECK: q[[Q0C0:[0-3]]] ^= vcmp.eq(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q0C0]],v0,v1)
-define <128 x i8> @test_0c(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0c(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp eq <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = xor <128 x i1> %q0, %q1
@@ -128,7 +128,7 @@ define <128 x i8> @test_0c(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0d:
 ; CHECK: q[[Q0D0:[0-3]]] &= vcmp.gt(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q0D0]],v0,v1)
-define <128 x i8> @test_0d(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0d(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp sgt <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = and <128 x i1> %q0, %q1
@@ -139,7 +139,7 @@ define <128 x i8> @test_0d(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0e:
 ; CHECK: q[[Q0E0:[0-3]]] |= vcmp.gt(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q0E0]],v0,v1)
-define <128 x i8> @test_0e(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0e(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp sgt <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = or <128 x i1> %q0, %q1
@@ -150,7 +150,7 @@ define <128 x i8> @test_0e(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0f:
 ; CHECK: q[[Q0F0:[0-3]]] ^= vcmp.gt(v0.b,v1.b)
 ; CHECK: v0 = vmux(q[[Q0F0]],v0,v1)
-define <128 x i8> @test_0f(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0f(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp sgt <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = xor <128 x i1> %q0, %q1
@@ -161,7 +161,7 @@ define <128 x i8> @test_0f(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0g:
 ; CHECK: q[[Q0G0:[0-3]]] &= vcmp.gt(v0.ub,v1.ub)
 ; CHECK: v0 = vmux(q[[Q0G0]],v0,v1)
-define <128 x i8> @test_0g(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0g(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp ugt <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = and <128 x i1> %q0, %q1
@@ -172,7 +172,7 @@ define <128 x i8> @test_0g(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0h:
 ; CHECK: q[[Q0H0:[0-3]]] |= vcmp.gt(v0.ub,v1.ub)
 ; CHECK: v0 = vmux(q[[Q0H0]],v0,v1)
-define <128 x i8> @test_0h(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0h(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp ugt <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = or <128 x i1> %q0, %q1
@@ -183,7 +183,7 @@ define <128 x i8> @test_0h(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_0i:
 ; CHECK: q[[Q0I0:[0-3]]] ^= vcmp.gt(v0.ub,v1.ub)
 ; CHECK: v0 = vmux(q[[Q0I0]],v0,v1)
-define <128 x i8> @test_0i(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
+define <128 x i8> @test_0i(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) {
   %q0 = icmp ugt <128 x i8> %v0, %v1
   %q1 = trunc <128 x i8> %v2 to <128 x i1>
   %q2 = xor <128 x i1> %q0, %q1
@@ -196,7 +196,7 @@ define <128 x i8> @test_0i(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 {
 ; CHECK-LABEL: test_10:
 ; CHECK: q[[Q100:[0-3]]] = vcmp.eq(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q100]],v1,v2)
-define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp eq <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -205,7 +205,7 @@ define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_11:
 ; CHECK: q[[Q110:[0-3]]] = vcmp.eq(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q110]],v2,v1)
-define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp ne <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -214,7 +214,7 @@ define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_12:
 ; CHECK: q[[Q120:[0-3]]] = vcmp.gt(v1.h,v0.h)
 ; CHECK: v0 = vmux(q[[Q120]],v1,v2)
-define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp slt <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -223,7 +223,7 @@ define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_13:
 ; CHECK: q[[Q130:[0-3]]] = vcmp.gt(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q130]],v2,v1)
-define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp sle <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -232,7 +232,7 @@ define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_14:
 ; CHECK: q[[Q140:[0-3]]] = vcmp.gt(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q140]],v1,v2)
-define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp sgt <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -241,7 +241,7 @@ define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_15:
 ; CHECK: q[[Q150:[0-3]]] = vcmp.gt(v1.h,v0.h)
 ; CHECK: v0 = vmux(q[[Q150]],v2,v1)
-define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp sge <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -250,7 +250,7 @@ define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_16:
 ; CHECK: q[[Q160:[0-3]]] = vcmp.gt(v1.uh,v0.uh)
 ; CHECK: v0 = vmux(q[[Q160]],v1,v2)
-define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp ult <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -259,7 +259,7 @@ define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_17:
 ; CHECK: q[[Q170:[0-3]]] = vcmp.gt(v0.uh,v1.uh)
 ; CHECK: v0 = vmux(q[[Q170]],v2,v1)
-define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp ule <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -268,7 +268,7 @@ define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_18:
 ; CHECK: q[[Q180:[0-3]]] = vcmp.gt(v0.uh,v1.uh)
 ; CHECK: v0 = vmux(q[[Q180]],v1,v2)
-define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp ugt <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -277,7 +277,7 @@ define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_19:
 ; CHECK: q[[Q190:[0-3]]] = vcmp.gt(v1.uh,v0.uh)
 ; CHECK: v0 = vmux(q[[Q190]],v2,v1)
-define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %t0 = icmp uge <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2
   ret <64 x i16> %t1
@@ -286,7 +286,7 @@ define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1a:
 ; CHECK: q[[Q1A0:[0-3]]] &= vcmp.eq(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q1A0]],v0,v1)
-define <64 x i16> @test_1a(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1a(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp eq <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = and <64 x i1> %q0, %q1
@@ -297,7 +297,7 @@ define <64 x i16> @test_1a(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1b:
 ; CHECK: q[[Q1B0:[0-3]]] |= vcmp.eq(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q1B0]],v0,v1)
-define <64 x i16> @test_1b(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1b(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp eq <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = or <64 x i1> %q0, %q1
@@ -308,7 +308,7 @@ define <64 x i16> @test_1b(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1c:
 ; CHECK: q[[Q1C0:[0-3]]] ^= vcmp.eq(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q1C0]],v0,v1)
-define <64 x i16> @test_1c(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1c(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp eq <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = xor <64 x i1> %q0, %q1
@@ -319,7 +319,7 @@ define <64 x i16> @test_1c(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1d:
 ; CHECK: q[[Q1D0:[0-3]]] &= vcmp.gt(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q1D0]],v0,v1)
-define <64 x i16> @test_1d(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1d(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp sgt <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = and <64 x i1> %q0, %q1
@@ -330,7 +330,7 @@ define <64 x i16> @test_1d(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1e:
 ; CHECK: q[[Q1E0:[0-3]]] |= vcmp.gt(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q1E0]],v0,v1)
-define <64 x i16> @test_1e(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1e(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp sgt <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = or <64 x i1> %q0, %q1
@@ -341,7 +341,7 @@ define <64 x i16> @test_1e(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1f:
 ; CHECK: q[[Q1F0:[0-3]]] ^= vcmp.gt(v0.h,v1.h)
 ; CHECK: v0 = vmux(q[[Q1F0]],v0,v1)
-define <64 x i16> @test_1f(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1f(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp sgt <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = xor <64 x i1> %q0, %q1
@@ -352,7 +352,7 @@ define <64 x i16> @test_1f(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1g:
 ; CHECK: q[[Q1G0:[0-3]]] &= vcmp.gt(v0.uh,v1.uh)
 ; CHECK: v0 = vmux(q[[Q1G0]],v0,v1)
-define <64 x i16> @test_1g(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1g(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp ugt <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = and <64 x i1> %q0, %q1
@@ -363,7 +363,7 @@ define <64 x i16> @test_1g(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1h:
 ; CHECK: q[[Q1H0:[0-3]]] |= vcmp.gt(v0.uh,v1.uh)
 ; CHECK: v0 = vmux(q[[Q1H0]],v0,v1)
-define <64 x i16> @test_1h(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1h(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp ugt <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = or <64 x i1> %q0, %q1
@@ -374,7 +374,7 @@ define <64 x i16> @test_1h(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_1i:
 ; CHECK: q[[Q1I0:[0-3]]] ^= vcmp.gt(v0.uh,v1.uh)
 ; CHECK: v0 = vmux(q[[Q1I0]],v0,v1)
-define <64 x i16> @test_1i(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
+define <64 x i16> @test_1i(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) {
   %q0 = icmp ugt <64 x i16> %v0, %v1
   %q1 = trunc <64 x i16> %v2 to <64 x i1>
   %q2 = xor <64 x i1> %q0, %q1
@@ -387,7 +387,7 @@ define <64 x i16> @test_1i(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 {
 ; CHECK-LABEL: test_20:
 ; CHECK: q[[Q200:[0-3]]] = vcmp.eq(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q200]],v1,v2)
-define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp eq <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -396,7 +396,7 @@ define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_21:
 ; CHECK: q[[Q210:[0-3]]] = vcmp.eq(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q210]],v2,v1)
-define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp ne <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -405,7 +405,7 @@ define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_22:
 ; CHECK: q[[Q220:[0-3]]] = vcmp.gt(v1.w,v0.w)
 ; CHECK: v0 = vmux(q[[Q220]],v1,v2)
-define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp slt <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -414,7 +414,7 @@ define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_23:
 ; CHECK: q[[Q230:[0-3]]] = vcmp.gt(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q230]],v2,v1)
-define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp sle <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -423,7 +423,7 @@ define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_24:
 ; CHECK: q[[Q240:[0-3]]] = vcmp.gt(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q240]],v1,v2)
-define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp sgt <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -432,7 +432,7 @@ define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_25:
 ; CHECK: q[[Q250:[0-3]]] = vcmp.gt(v1.w,v0.w)
 ; CHECK: v0 = vmux(q[[Q250]],v2,v1)
-define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp sge <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -441,7 +441,7 @@ define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_26:
 ; CHECK: q[[Q260:[0-3]]] = vcmp.gt(v1.uw,v0.uw)
 ; CHECK: v0 = vmux(q[[Q260]],v1,v2)
-define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp ult <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -450,7 +450,7 @@ define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_27:
 ; CHECK: q[[Q270:[0-3]]] = vcmp.gt(v0.uw,v1.uw)
 ; CHECK: v0 = vmux(q[[Q270]],v2,v1)
-define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp ule <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -459,7 +459,7 @@ define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_28:
 ; CHECK: q[[Q280:[0-3]]] = vcmp.gt(v0.uw,v1.uw)
 ; CHECK: v0 = vmux(q[[Q280]],v1,v2)
-define <32 x i32> @test_28(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_28(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp ugt <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -468,7 +468,7 @@ define <32 x i32> @test_28(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_29:
 ; CHECK: q[[Q290:[0-3]]] = vcmp.gt(v1.uw,v0.uw)
 ; CHECK: v0 = vmux(q[[Q290]],v2,v1)
-define <32 x i32> @test_29(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_29(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %t0 = icmp uge <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2
   ret <32 x i32> %t1
@@ -477,7 +477,7 @@ define <32 x i32> @test_29(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2a:
 ; CHECK: q[[Q2A0:[0-3]]] &= vcmp.eq(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q2A0]],v0,v1)
-define <32 x i32> @test_2a(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2a(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp eq <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = and <32 x i1> %q0, %q1
@@ -488,7 +488,7 @@ define <32 x i32> @test_2a(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2b:
 ; CHECK: q[[Q2B0:[0-3]]] |= vcmp.eq(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q2B0]],v0,v1)
-define <32 x i32> @test_2b(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2b(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp eq <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = or <32 x i1> %q0, %q1
@@ -499,7 +499,7 @@ define <32 x i32> @test_2b(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2c:
 ; CHECK: q[[Q2C0:[0-3]]] ^= vcmp.eq(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q2C0]],v0,v1)
-define <32 x i32> @test_2c(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2c(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp eq <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = xor <32 x i1> %q0, %q1
@@ -510,7 +510,7 @@ define <32 x i32> @test_2c(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2d:
 ; CHECK: q[[Q2D0:[0-3]]] &= vcmp.gt(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q2D0]],v0,v1)
-define <32 x i32> @test_2d(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2d(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp sgt <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = and <32 x i1> %q0, %q1
@@ -521,7 +521,7 @@ define <32 x i32> @test_2d(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2e:
 ; CHECK: q[[Q2E0:[0-3]]] |= vcmp.gt(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q2E0]],v0,v1)
-define <32 x i32> @test_2e(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2e(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp sgt <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = or <32 x i1> %q0, %q1
@@ -532,7 +532,7 @@ define <32 x i32> @test_2e(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2f:
 ; CHECK: q[[Q2F0:[0-3]]] ^= vcmp.gt(v0.w,v1.w)
 ; CHECK: v0 = vmux(q[[Q2F0]],v0,v1)
-define <32 x i32> @test_2f(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2f(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp sgt <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = xor <32 x i1> %q0, %q1
@@ -543,7 +543,7 @@ define <32 x i32> @test_2f(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2g:
 ; CHECK: q[[Q2G0:[0-3]]] &= vcmp.gt(v0.uw,v1.uw)
 ; CHECK: v0 = vmux(q[[Q2G0]],v0,v1)
-define <32 x i32> @test_2g(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2g(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp ugt <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = and <32 x i1> %q0, %q1
@@ -554,7 +554,7 @@ define <32 x i32> @test_2g(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2h:
 ; CHECK: q[[Q2H0:[0-3]]] |= vcmp.gt(v0.uw,v1.uw)
 ; CHECK: v0 = vmux(q[[Q2H0]],v0,v1)
-define <32 x i32> @test_2h(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2h(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp ugt <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = or <32 x i1> %q0, %q1
@@ -565,7 +565,7 @@ define <32 x i32> @test_2h(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
 ; CHECK-LABEL: test_2i:
 ; CHECK: q[[Q2I0:[0-3]]] ^= vcmp.gt(v0.uw,v1.uw)
 ; CHECK: v0 = vmux(q[[Q2I0]],v0,v1)
-define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
+define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) {
   %q0 = icmp ugt <32 x i32> %v0, %v1
   %q1 = trunc <32 x i32> %v2 to <32 x i1>
   %q2 = xor <32 x i1> %q0, %q1
@@ -573,4 +573,134 @@ define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
   ret <32 x i32> %t1
 }
 
-attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
+; --- Float32
+
+; CHECK-LABEL: test_2j:
+; CHECK: q[[Q2J0:[0-3]]] = vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2J0]],v0,v1)
+define <32 x float> @test_2j(<32 x float> %v0, <32 x float> %v1) {
+  %t0 = fcmp oeq <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2k:
+; CHECK: q[[Q2K0:[0-3]]] = vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2K0]],v1,v0)
+define <32 x float> @test_2k(<32 x float> %v0, <32 x float> %v1) {
+  %t0 = fcmp one <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2l:
+; CHECK: v0.sf = vmin(v1.sf,v0.sf)
+define <32 x float> @test_2l(<32 x float> %v0, <32 x float> %v1) {
+  %t0 = fcmp olt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2m:
+; CHECK: q[[Q2M0:[0-3]]] = vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2M0]],v1,v0)
+define <32 x float> @test_2m(<32 x float> %v0, <32 x float> %v1) {
+  %t0 = fcmp ole <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2n:
+; CHECK: v0.sf = vmax(v0.sf,v1.sf)
+define <32 x float> @test_2n(<32 x float> %v0, <32 x float> %v1) {
+  %t0 = fcmp ogt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2o:
+; CHECK: q[[Q2O0:[0-3]]] = vcmp.gt(v1.sf,v0.sf)
+; CHECK: v0 = vmux(q[[Q2O0]],v1,v0)
+define <32 x float> @test_2o(<32 x float> %v0, <32 x float> %v1) {
+  %t0 = fcmp oge <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2p:
+; CHECK: r[[R2P0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2P1:[0-3]]] = vand(v2,r[[R2P0]])
+; CHECK: q[[Q2P1:[0-3]]] &= vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2P1]],v0,v1)
+define <32 x float> @test_2p(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) {
+  %q0 = fcmp oeq <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = and <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2q:
+; CHECK: r[[R2Q0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2Q1:[0-3]]] = vand(v2,r[[R2Q0]])
+; CHECK: q[[Q2Q1:[0-3]]] |= vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2Q1]],v0,v1)
+define <32 x float> @test_2q(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) {
+  %q0 = fcmp oeq <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = or <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2r:
+; CHECK: r[[R2R0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2R1:[0-3]]] = vand(v2,r[[R2R0]])
+; CHECK: q[[Q2R1:[0-3]]] ^= vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2R1]],v0,v1)
+define <32 x float> @test_2r(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) {
+  %q0 = fcmp oeq <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = xor <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2s:
+; CHECK: r[[R2S0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2S1:[0-3]]] = vand(v2,r[[R2S0]])
+; CHECK: q[[Q2S1:[0-3]]] &= vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2R1]],v0,v1)
+define <32 x float> @test_2s(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) {
+  %q0 = fcmp ogt <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = and <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2t:
+; CHECK: r[[R2T0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2T1:[0-3]]] = vand(v2,r[[R2T0]])
+; CHECK: q[[Q2T1:[0-3]]] |= vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2T1]],v0,v1)
+define <32 x float> @test_2t(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) {
+  %q0 = fcmp ogt <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = or <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2u:
+; CHECK: r[[R2U0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2U1:[0-3]]] = vand(v2,r[[R2U0]])
+; CHECK: q[[Q2U1:[0-3]]] ^= vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2U1]],v0,v1)
+define <32 x float> @test_2u(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) {
+  %q0 = fcmp ogt <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = xor <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
index 7673f8b12264f..52176d6d2158c 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
@@ -574,4 +574,4 @@ define <16 x i32> @test_2i(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 {
   ret <16 x i32> %t1
 }
 
-attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }



More information about the llvm-commits mailing list