[llvm] f8cb97a - [Hexagon] Fix deinterleaving after vmpyh

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 23 13:32:12 PST 2022


Author: Krzysztof Parzyszek
Date: 2022-11-23T13:31:54-08:00
New Revision: f8cb97a0f1a4fed7b0347f127ffe33841cf02134

URL: https://github.com/llvm/llvm-project/commit/f8cb97a0f1a4fed7b0347f127ffe33841cf02134
DIFF: https://github.com/llvm/llvm-project/commit/f8cb97a0f1a4fed7b0347f127ffe33841cf02134.diff

LOG: [Hexagon] Fix deinterleaving after vmpyh

Added: 
    

Modified: 
    llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
    llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
    llvm/test/CodeGen/Hexagon/autohvx/qmul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 144555ed55e3..fe9fd14d0f0b 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -1586,7 +1586,7 @@ auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
   Value *P =
       HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
   // Deinterleave
-  return HVC.vdeal(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
+  return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
 }
 
 auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
index a4a418f0d94e..5009adf8e239 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll
@@ -10,13 +10,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
 ; V60-NEXT:     v1:0.w = vmpy(v1.h,v0.h)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
-; V60-NEXT:     r7 = #-4
+; V60-NEXT:     r7:6 = combine(#64,#68)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
-; V60-NEXT:     v1:0 = vdeal(v1,v0,r7)
+; V60-NEXT:     r5 = #120
 ; V60-NEXT:    }
 ; V60-NEXT:    {
-; V60-NEXT:     v0.h = vpacko(v1.w,v0.w)
+; V60-NEXT:     v1:0 = vshuff(v1,v0,r7)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v3:2 = vdeal(v0,v0,r6)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v31:30 = vdeal(v0,v1,r6)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v3:2 = vshuff(v3,v2,r5)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v1:0 = vshuff(v31,v30,r5)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v0.h = vpacko(v0.w,v2.w)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
 ; V60-NEXT:     jumpr r31
@@ -28,13 +43,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
 ; V65-NEXT:     v1:0.w = vmpy(v1.h,v0.h)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
-; V65-NEXT:     r7 = #-4
+; V65-NEXT:     r7:6 = combine(#64,#68)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
-; V65-NEXT:     v1:0 = vdeal(v1,v0,r7)
+; V65-NEXT:     r5 = #120
 ; V65-NEXT:    }
 ; V65-NEXT:    {
-; V65-NEXT:     v0.h = vpacko(v1.w,v0.w)
+; V65-NEXT:     v1:0 = vshuff(v1,v0,r7)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v3:2 = vdeal(v0,v0,r6)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v31:30 = vdeal(v0,v1,r6)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v3:2 = vshuff(v3,v2,r5)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v1:0 = vshuff(v31,v30,r5)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v0.h = vpacko(v0.w,v2.w)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
 ; V65-NEXT:     jumpr r31
@@ -46,13 +76,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
 ; V69-NEXT:     v1:0.w = vmpy(v1.h,v0.h)
 ; V69-NEXT:    }
 ; V69-NEXT:    {
-; V69-NEXT:     r7 = #-4
+; V69-NEXT:     r7:6 = combine(#64,#68)
+; V69-NEXT:    }
+; V69-NEXT:    {
+; V69-NEXT:     r5 = #120
+; V69-NEXT:    }
+; V69-NEXT:    {
+; V69-NEXT:     v1:0 = vshuff(v1,v0,r7)
+; V69-NEXT:    }
+; V69-NEXT:    {
+; V69-NEXT:     v3:2 = vdeal(v0,v0,r6)
+; V69-NEXT:    }
+; V69-NEXT:    {
+; V69-NEXT:     v31:30 = vdeal(v0,v1,r6)
 ; V69-NEXT:    }
 ; V69-NEXT:    {
-; V69-NEXT:     v1:0 = vdeal(v1,v0,r7)
+; V69-NEXT:     v3:2 = vshuff(v3,v2,r5)
 ; V69-NEXT:    }
 ; V69-NEXT:    {
-; V69-NEXT:     v0.h = vpacko(v1.w,v0.w)
+; V69-NEXT:     v1:0 = vshuff(v31,v30,r5)
+; V69-NEXT:    }
+; V69-NEXT:    {
+; V69-NEXT:     v0.h = vpacko(v0.w,v2.w)
 ; V69-NEXT:    }
 ; V69-NEXT:    {
 ; V69-NEXT:     jumpr r31
@@ -72,13 +117,28 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 {
 ; V60-NEXT:     v1:0.uw = vmpy(v1.uh,v0.uh)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
-; V60-NEXT:     r7 = #-4
+; V60-NEXT:     r7:6 = combine(#64,#68)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     r5 = #120
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v1:0 = vshuff(v1,v0,r7)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v3:2 = vdeal(v0,v0,r6)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v31:30 = vdeal(v0,v1,r6)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
-; V60-NEXT:     v1:0 = vdeal(v1,v0,r7)
+; V60-NEXT:     v3:2 = vshuff(v3,v2,r5)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
-; V60-NEXT:     v0.h = vpacko(v1.w,v0.w)
+; V60-NEXT:     v1:0 = vshuff(v31,v30,r5)
+; V60-NEXT:    }
+; V60-NEXT:    {
+; V60-NEXT:     v0.h = vpacko(v0.w,v2.w)
 ; V60-NEXT:    }
 ; V60-NEXT:    {
 ; V60-NEXT:     jumpr r31
@@ -90,13 +150,28 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 {
 ; V65-NEXT:     v1:0.uw = vmpy(v1.uh,v0.uh)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
-; V65-NEXT:     r7 = #-4
+; V65-NEXT:     r7:6 = combine(#64,#68)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     r5 = #120
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v1:0 = vshuff(v1,v0,r7)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v3:2 = vdeal(v0,v0,r6)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v31:30 = vdeal(v0,v1,r6)
+; V65-NEXT:    }
+; V65-NEXT:    {
+; V65-NEXT:     v3:2 = vshuff(v3,v2,r5)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
-; V65-NEXT:     v1:0 = vdeal(v1,v0,r7)
+; V65-NEXT:     v1:0 = vshuff(v31,v30,r5)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
-; V65-NEXT:     v0.h = vpacko(v1.w,v0.w)
+; V65-NEXT:     v0.h = vpacko(v0.w,v2.w)
 ; V65-NEXT:    }
 ; V65-NEXT:    {
 ; V65-NEXT:     jumpr r31

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll b/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
index 6760875ec0df..13634ed445ac 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll
@@ -75,7 +75,10 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 {
 ; CHECK-NEXT:     v0 = vmem(r1+#0)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r7 = #-4
+; CHECK-NEXT:     r7 = #64
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r5:4 = combine(#68,#120)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
 ; CHECK-NEXT:     r3 = #15
@@ -87,16 +90,28 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 {
 ; CHECK-NEXT:     v1:0.w = vmpy(v0.h,v1.h)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1:0 = vdeal(v1,v0,r7)
+; CHECK-NEXT:     v1:0 = vshuff(v1,v0,r7)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0.uw = vlsr(v0.uw,r3)
+; CHECK-NEXT:     v3:2 = vdeal(v0,v0,r5)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v31:30 = vdeal(v0,v1,r5)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v3:2 = vshuff(v3,v2,r4)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v1:0 = vshuff(v31,v30,r4)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1.uw = vlsr(v1.uw,r3)
+; CHECK-NEXT:     v1.uw = vlsr(v2.uw,r3)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.uw = vlsr(v0.uw,r3)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0.h = vpacke(v1.w,v0.w)
+; CHECK-NEXT:     v0.h = vpacke(v0.w,v1.w)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
 ; CHECK-NEXT:     vmem(r2+#0) = v0


        


More information about the llvm-commits mailing list