[llvm] ea6693d - [Hexagon] Add missing patterns for mulhs/mulhu
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 08:14:10 PST 2022
Author: Krzysztof Parzyszek
Date: 2022-11-18T08:13:57-08:00
New Revision: ea6693d4c840272b5f9b83518b474ed7b2449744
URL: https://github.com/llvm/llvm-project/commit/ea6693d4c840272b5f9b83518b474ed7b2449744
DIFF: https://github.com/llvm/llvm-project/commit/ea6693d4c840272b5f9b83518b474ed7b2449744.diff
LOG: [Hexagon] Add missing patterns for mulhs/mulhu
Added:
llvm/test/CodeGen/Hexagon/isel/mulh-scalar.ll
Modified:
llvm/lib/Target/Hexagon/HexagonPatterns.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index bf6303fd165e9..ac4a352548bf9 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -1546,47 +1546,63 @@ def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)),
(Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)),
(M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>;
-def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)),
- (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)),
- (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>;
+def: Pat<(v2i32 (mulhs V2I32:$Rss, V2I32:$Rtt)),
+ (Combinew (M2_mpy_up (HiReg $Rss), (HiReg $Rtt)),
+ (M2_mpy_up (LoReg $Rss), (LoReg $Rtt)))>;
-def Mulhub:
+def Mulhub4:
+ OutPatFrag<(ops node:$Rs, node:$Rt), (S2_vtrunohb (M5_vmpybuu $Rs, $Rt))>;
+def Mulhub8:
OutPatFrag<(ops node:$Rss, node:$Rtt),
- (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))),
- (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>;
+ (Combinew (Mulhub4 (HiReg $Rss), (HiReg $Rtt)),
+ (Mulhub4 (LoReg $Rss), (LoReg $Rtt)))>;
-// Equivalent of byte-wise arithmetic shift right by 7 in v8i8.
-def Asr7:
- OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>;
+// (mux (x >= 0), 0, y)
+def Negbytes8:
+ OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (C2_vmux (A4_vcmpbgti $Rss, -1), (A2_tfrpi 0), $Rtt)>;
+
+def: Pat<(v4i8 (mulhu V4I8:$Rs, V4I8:$Rt)), (Mulhub4 $Rs, $Rt)>;
+def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)), (Mulhub8 $Rss, $Rtt)>;
-def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)),
- (Mulhub $Rss, $Rtt)>;
+// (Mulhs x, y) = (Mulhu x, y) - (x < 0 ? y : 0) - (y < 0 ? x : 0)
+def Mulhsb8:
+ OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (A2_vsubub (Mulhub8 $Rss, $Rtt),
+ (A2_vaddub (Negbytes8 $Rss, $Rtt),
+ (Negbytes8 $Rtt, $Rss)))>;
-def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)),
- (A2_vsubub
- (Mulhub $Rss, $Rtt),
- (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)),
- (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>;
+def: Pat<(v4i8 (mulhs V4I8:$Rs, V4I8:$Rt)),
+ (LoReg (Mulhsb8 (v8i8 (ToAext64 $Rs)), (v8i8 (ToAext64 $Rt))))>;
+def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)), (Mulhsb8 $Rss, $Rtt)>;
-def Mpysh:
+// v2i16 *s v2i16 -> v2i32
+def Muli16:
OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>;
-def Mpyshh:
- OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>;
-def Mpyshl:
- OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>;
-def Mulhsh:
+def Mulhsh2:
+ OutPatFrag<(ops node:$Rs, node:$Rt),
+ (A2_combine_hh (HiReg (Muli16 $Rs, $Rt)),
+ (LoReg (Muli16 $Rs, $Rt)))>;
+def Mulhsh4:
OutPatFrag<(ops node:$Rss, node:$Rtt),
- (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)),
- (LoReg (Mpyshh $Rss, $Rtt))),
- (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)),
- (LoReg (Mpyshl $Rss, $Rtt))))>;
+ (Combinew (Mulhsh2 (HiReg $Rss), (HiReg $Rtt)),
+ (Mulhsh2 (LoReg $Rss), (LoReg $Rtt)))>;
+
+def: Pat<(v2i16 (mulhs V2I16:$Rs, V2I16:$Rt)), (Mulhsh2 $Rs, $Rt)>;
+def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh4 $Rss, $Rtt)>;
-def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>;
+def: Pat<(v2i16 (mulhu V2I16:$Rs, V2I16:$Rt)),
+ (A2_svaddh
+ (Mulhsh2 $Rs, $Rt),
+ (A2_svaddh (LoReg (A2_andp (Combinew $Rt, $Rs),
+ (S2_asr_i_vh (Combinew $Rs, $Rt), 15))),
+ (HiReg (A2_andp (Combinew $Rt, $Rs),
+ (S2_asr_i_vh (Combinew $Rs, $Rt), 15)))))>;
def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)),
(A2_vaddh
- (Mulhsh $Rss, $Rtt),
+ (Mulhsh4 $Rss, $Rtt),
(A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)),
(A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>;
@@ -1783,7 +1799,7 @@ def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
// half-words, and saturates the result to a 32-bit value, except the
// saturation never happens (it can only occur with scaling).
def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
- (LoReg (S2_vtrunewh (A2_combineii 0, 0),
+ (LoReg (S2_vtrunewh (IMPLICIT_DEF),
(M2_vmpy2s_s0 V2I16:$Rs, V2I16:$Rt)))>;
def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
(S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)),
diff --git a/llvm/test/CodeGen/Hexagon/isel/mulh-scalar.ll b/llvm/test/CodeGen/Hexagon/isel/mulh-scalar.ll
new file mode 100644
index 0000000000000..3364a3c6dbe22
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel/mulh-scalar.ll
@@ -0,0 +1,315 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+define <4 x i8> @f0(<4 x i8> %a0, <4 x i8> %a1) #0 {
+; CHECK-LABEL: f0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = r1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = combine(#0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = vmpybu(r0,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p1 = vcmpb.gt(r1:0,#-1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = vcmpb.gt(r3:2,#-1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vmux(p1,r7:6,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vmux(p0,r7:6,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = vtrunohb(r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vmpybu(r0,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaddub(r3:2,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = vtrunohb(r7:6)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vsubub(r5:4,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = sext <4 x i8> %a0 to <4 x i16>
+ %v1 = sext <4 x i8> %a1 to <4 x i16>
+ %v2 = mul <4 x i16> %v0, %v1
+ %v3 = lshr <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8>
+ %v4 = trunc <4 x i16> %v3 to <4 x i8>
+ ret <4 x i8> %v4
+}
+
+define <4 x i8> @f1(<4 x i8> %a0, <4 x i8> %a1) #0 {
+; CHECK-LABEL: f1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vmpybu(r0,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunohb(r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = zext <4 x i8> %a0 to <4 x i16>
+ %v1 = zext <4 x i8> %a1 to <4 x i16>
+ %v2 = mul <4 x i16> %v0, %v1
+ %v3 = lshr <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8>
+ %v4 = trunc <4 x i16> %v3 to <4 x i8>
+ ret <4 x i8> %v4
+}
+
+define <8 x i8> @f2(<8 x i8> %a0, <8 x i8> %a1) #0 {
+; CHECK-LABEL: f2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = combine(#0,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = vcmpb.gt(r3:2,#-1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = vmpybu(r0,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r9:8 = vmux(p0,r7:6,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = vcmpb.gt(r1:0,#-1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vmpybu(r1,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vmux(p0,r7:6,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = vtrunohb(r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vaddub(r7:6,r9:8)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = vtrunohb(r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vsubub(r5:4,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = sext <8 x i8> %a0 to <8 x i16>
+ %v1 = sext <8 x i8> %a1 to <8 x i16>
+ %v2 = mul <8 x i16> %v0, %v1
+ %v3 = lshr <8 x i16> %v2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %v4 = trunc <8 x i16> %v3 to <8 x i8>
+ ret <8 x i8> %v4
+}
+
+define <8 x i8> @f3(<8 x i8> %a0, <8 x i8> %a1) #0 {
+; CHECK-LABEL: f3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = vmpybu(r0,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vmpybu(r1,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vtrunohb(r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = vtrunohb(r7:6)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = zext <8 x i8> %a0 to <8 x i16>
+ %v1 = zext <8 x i8> %a1 to <8 x i16>
+ %v2 = mul <8 x i16> %v0, %v1
+ %v3 = lshr <8 x i16> %v2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %v4 = trunc <8 x i16> %v3 to <8 x i8>
+ ret <8 x i8> %v4
+}
+
+define <2 x i16> @f4(<2 x i16> %a0, <2 x i16> %a1) #0 {
+; CHECK-LABEL: f4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vmpyh(r0,r1):sat
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = combine(r1.h,r0.h)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = sext <2 x i16> %a0 to <2 x i32>
+ %v1 = sext <2 x i16> %a1 to <2 x i32>
+ %v2 = mul <2 x i32> %v0, %v1
+ %v3 = lshr <2 x i32> %v2, <i32 16, i32 16>
+ %v4 = trunc <2 x i32> %v3 to <2 x i16>
+ ret <2 x i16> %v4
+}
+
+define <2 x i16> @f5(<2 x i16> %a0, <2 x i16> %a1) #0 {
+; CHECK-LABEL: f5:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = combine(r0,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vasrh(r3:2,#15)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = vmpyh(r3,r2):sat
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = and(r3,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = and(r2,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = combine(r5.h,r4.h)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vaddh(r0,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = vaddh(r4,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = zext <2 x i16> %a0 to <2 x i32>
+ %v1 = zext <2 x i16> %a1 to <2 x i32>
+ %v2 = mul <2 x i32> %v0, %v1
+ %v3 = lshr <2 x i32> %v2, <i32 16, i32 16>
+ %v4 = trunc <2 x i32> %v3 to <2 x i16>
+ ret <2 x i16> %v4
+}
+
+define <4 x i16> @f6(<4 x i16> %a0, <4 x i16> %a1) #0 {
+; CHECK-LABEL: f6:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = vmpyh(r0,r2):sat
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vmpyh(r1,r3):sat
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = combine(r5.h,r4.h)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = combine(r7.h,r6.h)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = sext <4 x i16> %a0 to <4 x i32>
+ %v1 = sext <4 x i16> %a1 to <4 x i32>
+ %v2 = mul <4 x i32> %v0, %v1
+ %v3 = lshr <4 x i32> %v2, <i32 16, i32 16, i32 16, i32 16>
+ %v4 = trunc <4 x i32> %v3 to <4 x i16>
+ ret <4 x i16> %v4
+}
+
+define <4 x i16> @f7(<4 x i16> %a0, <4 x i16> %a1) #0 {
+; CHECK-LABEL: f7:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = vasrh(r1:0,#15)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r9:8 = vasrh(r3:2,#15)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = vmpyh(r0,r2):sat
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r7:6 = and(r3:2,r7:6)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vmpyh(r1,r3):sat
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = and(r1:0,r9:8)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 = combine(r5.h,r4.h)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 = combine(r3.h,r2.h)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaddh(r1:0,r7:6)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = vaddh(r5:4,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = zext <4 x i16> %a0 to <4 x i32>
+ %v1 = zext <4 x i16> %a1 to <4 x i32>
+ %v2 = mul <4 x i32> %v0, %v1
+ %v3 = lshr <4 x i32> %v2, <i32 16, i32 16, i32 16, i32 16>
+ %v4 = trunc <4 x i32> %v3 to <4 x i16>
+ ret <4 x i16> %v4
+}
+
+define <2 x i32> @f8(<2 x i32> %a0, <2 x i32> %a1) #0 {
+; CHECK-LABEL: f8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = mpy(r0,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = mpy(r1,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = sext <2 x i32> %a0 to <2 x i64>
+ %v1 = sext <2 x i32> %a1 to <2 x i64>
+ %v2 = mul <2 x i64> %v0, %v1
+ %v3 = lshr <2 x i64> %v2, <i64 32, i64 32>
+ %v4 = trunc <2 x i64> %v3 to <2 x i32>
+ ret <2 x i32> %v4
+}
+
+define <2 x i32> @f9(<2 x i32> %a0, <2 x i32> %a1) #0 {
+; CHECK-LABEL: f9:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = mpyu(r0,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = mpyu(r1,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = zext <2 x i32> %a0 to <2 x i64>
+ %v1 = zext <2 x i32> %a1 to <2 x i64>
+ %v2 = mul <2 x i64> %v0, %v1
+ %v3 = lshr <2 x i64> %v2, <i64 32, i64 32>
+ %v4 = trunc <2 x i64> %v3 to <2 x i32>
+ ret <2 x i32> %v4
+}
+
+attributes #0 = { nounwind memory(none) "target-features"="-packets" }
More information about the llvm-commits
mailing list