[llvm] 739cd26 - [PowerPC] Exploit the High Order Vector Multiply Instructions on Power10

Fri Jul 24 19:41:34 PDT 2020

Author: Amy Kwan
Date: 2020-07-24T20:57:57-05:00
New Revision: 739cd2638b12c37d6bf867d68cd86d18bc5e2e42

URL: https://github.com/llvm/llvm-project/commit/739cd2638b12c37d6bf867d68cd86d18bc5e2e42
DIFF: https://github.com/llvm/llvm-project/commit/739cd2638b12c37d6bf867d68cd86d18bc5e2e42.diff

LOG: [PowerPC] Exploit the High Order Vector Multiply Instructions on Power10

This patch aims to exploit the following vector multiply high instructions on Power10.
vmulhsw VRT, VRA, VRB
vmulhsd VRT, VRA, VRB
vmulhuw VRT, VRA, VRB
vmulhud VRT, VRA, VRB

Differential Revision: https://reviews.llvm.org/D82584

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c2ba7195509a..8c28ead9f604 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -811,6 +811,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
     if (Subtarget.isISA3_1()) {
       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
+      setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
+      setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
+      setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
+      setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 4e048ee9930e..22839e697381 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -976,13 +976,17 @@ let Predicates = [IsISA3_1] in {
                         "vmulld $vD, $vA, $vB", IIC_VecGeneral,
                         [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
   def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhsw $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>;
   def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhuw $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>;
   def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhsd $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>;
   def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhud $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhud $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>;
   def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                         "vmodsw $vD, $vA, $vB", IIC_VecGeneral,
                         [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
index e8f77574f66c..75c6d8c24038 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -7,6 +7,9 @@
 ; RUN:   FileCheck %s
 
 ; This test case aims to test the vector multiply instructions on Power10.
+; This includes the low order and high order versions of vector multiply.
+; The low order version operates on doublewords, whereas the high order version
+; operates on signed and unsigned words and doublewords.
 
 define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmulld:
@@ -17,3 +20,59 @@ entry:
   %mul = mul <2 x i64> %b, %a
   ret <2 x i64> %mul
 }
+
+define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhsd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhsd v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = sext <2 x i64> %a to <2 x i128>
+  %1 = sext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %0
+  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhud:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhud v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = zext <2 x i64> %a to <2 x i128>
+  %1 = zext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %0
+  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhsw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhsw v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = sext <4 x i32> %a to <4 x i64>
+  %1 = sext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %0
+  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}
+
+define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhuw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhuw v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = zext <4 x i32> %a to <4 x i64>
+  %1 = zext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %0
+  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}