[llvm] f8cb97a - [Hexagon] Fix deinterleaving after vmpyh
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 23 13:32:12 PST 2022
Author: Krzysztof Parzyszek
Date: 2022-11-23T13:31:54-08:00
New Revision: f8cb97a0f1a4fed7b0347f127ffe33841cf02134
URL: https://github.com/llvm/llvm-project/commit/f8cb97a0f1a4fed7b0347f127ffe33841cf02134
DIFF: https://github.com/llvm/llvm-project/commit/f8cb97a0f1a4fed7b0347f127ffe33841cf02134.diff
LOG: [Hexagon] Fix deinterleaving after vmpyh
Added:
Modified:
llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 144555ed55e3..fe9fd14d0f0b 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -1586,7 +1586,7 @@ auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
Value *P =
HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
// Deinterleave
- return HVC.vdeal(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
+ return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
}
auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
index a4a418f0d94e..5009adf8e239 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
@@ -10,13 +10,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
; V60-NEXT: v1:0.w = vmpy(v1.h,v0.h)
; V60-NEXT: }
; V60-NEXT: {
-; V60-NEXT: r7 = #-4
+; V60-NEXT: r7:6 = combine(#64,#68)
; V60-NEXT: }
; V60-NEXT: {
-; V60-NEXT: v1:0 = vdeal(v1,v0,r7)
+; V60-NEXT: r5 = #120
; V60-NEXT: }
; V60-NEXT: {
-; V60-NEXT: v0.h = vpacko(v1.w,v0.w)
+; V60-NEXT: v1:0 = vshuff(v1,v0,r7)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v3:2 = vdeal(v0,v0,r6)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v31:30 = vdeal(v0,v1,r6)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v3:2 = vshuff(v3,v2,r5)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v1:0 = vshuff(v31,v30,r5)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v0.h = vpacko(v0.w,v2.w)
; V60-NEXT: }
; V60-NEXT: {
; V60-NEXT: jumpr r31
@@ -28,13 +43,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
; V65-NEXT: v1:0.w = vmpy(v1.h,v0.h)
; V65-NEXT: }
; V65-NEXT: {
-; V65-NEXT: r7 = #-4
+; V65-NEXT: r7:6 = combine(#64,#68)
; V65-NEXT: }
; V65-NEXT: {
-; V65-NEXT: v1:0 = vdeal(v1,v0,r7)
+; V65-NEXT: r5 = #120
; V65-NEXT: }
; V65-NEXT: {
-; V65-NEXT: v0.h = vpacko(v1.w,v0.w)
+; V65-NEXT: v1:0 = vshuff(v1,v0,r7)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v3:2 = vdeal(v0,v0,r6)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v31:30 = vdeal(v0,v1,r6)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v3:2 = vshuff(v3,v2,r5)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v1:0 = vshuff(v31,v30,r5)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v0.h = vpacko(v0.w,v2.w)
; V65-NEXT: }
; V65-NEXT: {
; V65-NEXT: jumpr r31
@@ -46,13 +76,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
; V69-NEXT: v1:0.w = vmpy(v1.h,v0.h)
; V69-NEXT: }
; V69-NEXT: {
-; V69-NEXT: r7 = #-4
+; V69-NEXT: r7:6 = combine(#64,#68)
+; V69-NEXT: }
+; V69-NEXT: {
+; V69-NEXT: r5 = #120
+; V69-NEXT: }
+; V69-NEXT: {
+; V69-NEXT: v1:0 = vshuff(v1,v0,r7)
+; V69-NEXT: }
+; V69-NEXT: {
+; V69-NEXT: v3:2 = vdeal(v0,v0,r6)
+; V69-NEXT: }
+; V69-NEXT: {
+; V69-NEXT: v31:30 = vdeal(v0,v1,r6)
; V69-NEXT: }
; V69-NEXT: {
-; V69-NEXT: v1:0 = vdeal(v1,v0,r7)
+; V69-NEXT: v3:2 = vshuff(v3,v2,r5)
; V69-NEXT: }
; V69-NEXT: {
-; V69-NEXT: v0.h = vpacko(v1.w,v0.w)
+; V69-NEXT: v1:0 = vshuff(v31,v30,r5)
+; V69-NEXT: }
+; V69-NEXT: {
+; V69-NEXT: v0.h = vpacko(v0.w,v2.w)
; V69-NEXT: }
; V69-NEXT: {
; V69-NEXT: jumpr r31
@@ -72,13 +117,28 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 {
; V60-NEXT: v1:0.uw = vmpy(v1.uh,v0.uh)
; V60-NEXT: }
; V60-NEXT: {
-; V60-NEXT: r7 = #-4
+; V60-NEXT: r7:6 = combine(#64,#68)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: r5 = #120
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v1:0 = vshuff(v1,v0,r7)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v3:2 = vdeal(v0,v0,r6)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v31:30 = vdeal(v0,v1,r6)
; V60-NEXT: }
; V60-NEXT: {
-; V60-NEXT: v1:0 = vdeal(v1,v0,r7)
+; V60-NEXT: v3:2 = vshuff(v3,v2,r5)
; V60-NEXT: }
; V60-NEXT: {
-; V60-NEXT: v0.h = vpacko(v1.w,v0.w)
+; V60-NEXT: v1:0 = vshuff(v31,v30,r5)
+; V60-NEXT: }
+; V60-NEXT: {
+; V60-NEXT: v0.h = vpacko(v0.w,v2.w)
; V60-NEXT: }
; V60-NEXT: {
; V60-NEXT: jumpr r31
@@ -90,13 +150,28 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 {
; V65-NEXT: v1:0.uw = vmpy(v1.uh,v0.uh)
; V65-NEXT: }
; V65-NEXT: {
-; V65-NEXT: r7 = #-4
+; V65-NEXT: r7:6 = combine(#64,#68)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: r5 = #120
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v1:0 = vshuff(v1,v0,r7)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v3:2 = vdeal(v0,v0,r6)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v31:30 = vdeal(v0,v1,r6)
+; V65-NEXT: }
+; V65-NEXT: {
+; V65-NEXT: v3:2 = vshuff(v3,v2,r5)
; V65-NEXT: }
; V65-NEXT: {
-; V65-NEXT: v1:0 = vdeal(v1,v0,r7)
+; V65-NEXT: v1:0 = vshuff(v31,v30,r5)
; V65-NEXT: }
; V65-NEXT: {
-; V65-NEXT: v0.h = vpacko(v1.w,v0.w)
+; V65-NEXT: v0.h = vpacko(v0.w,v2.w)
; V65-NEXT: }
; V65-NEXT: {
; V65-NEXT: jumpr r31
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll b/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
index 6760875ec0df..13634ed445ac 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
@@ -75,7 +75,10 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 {
; CHECK-NEXT: v0 = vmem(r1+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r7 = #-4
+; CHECK-NEXT: r7 = #64
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5:4 = combine(#68,#120)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3 = #15
@@ -87,16 +90,28 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 {
; CHECK-NEXT: v1:0.w = vmpy(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1:0 = vdeal(v1,v0,r7)
+; CHECK-NEXT: v1:0 = vshuff(v1,v0,r7)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.uw = vlsr(v0.uw,r3)
+; CHECK-NEXT: v3:2 = vdeal(v0,v0,r5)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v31:30 = vdeal(v0,v1,r5)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v3:2 = vshuff(v3,v2,r4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1:0 = vshuff(v31,v30,r4)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1.uw = vlsr(v1.uw,r3)
+; CHECK-NEXT: v1.uw = vlsr(v2.uw,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v0.uw = vlsr(v0.uw,r3)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.h = vpacke(v1.w,v0.w)
+; CHECK-NEXT: v0.h = vpacke(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: vmem(r2+#0) = v0
More information about the llvm-commits
mailing list