[llvm] 9c64f04 - [PowerPC] Legalize saturating vector add/sub

Wed Jan 15 05:01:34 PST 2020

Author: Nemanja Ivanovic
Date: 2020-01-15T07:00:38-06:00
New Revision: 9c64f04df8ecbcad2c527c33f5ef8a7993842001

URL: https://github.com/llvm/llvm-project/commit/9c64f04df8ecbcad2c527c33f5ef8a7993842001
DIFF: https://github.com/llvm/llvm-project/commit/9c64f04df8ecbcad2c527c33f5ef8a7993842001.diff

LOG: [PowerPC] Legalize saturating vector add/sub

These intrinsics and the corresponding ISD nodes were recently added. PPC has
instructions that do this for vectors. Legalize them and add patterns to emit
the satuarting instructions.

Differential revision: https://reviews.llvm.org/D71940

Added: 
    llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrAltivec.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 60ed72e1018b..c95e321af410 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -567,6 +567,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   }
 
   if (Subtarget.hasAltivec()) {
+    for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+      setOperationAction(ISD::SADDSAT, VT, Legal);
+      setOperationAction(ISD::SSUBSAT, VT, Legal);
+      setOperationAction(ISD::UADDSAT, VT, Legal);
+      setOperationAction(ISD::USUBSAT, VT, Legal);
+    }
     // First set operation action for all vector types to expand. Then we
     // will selectively turn on ones that can be effectively codegen'd.
     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index f94816a35f79..ee4ea4487aab 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -869,6 +869,20 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
 def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
           (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
 
+// Saturating adds/subtracts.
+def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
+def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
+def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>;
+def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>;
+def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>;
+def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>;
+def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>;
+def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>;
+def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>;
+def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>;
+def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
+def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
+
 // Loads.
 def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
 

diff  --git a/llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll b/llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll
new file mode 100644
index 000000000000..05d9e2710d34
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/saturating-intrinsics.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \
+; RUN:   -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
+define dso_local <16 x i8 > @vectorsaddb(<16 x i8 > %a, <16 x i8 > %b) {
+; CHECK-LABEL: vectorsaddb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vaddsbs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %call
+}
+
+define dso_local <16 x i8 > @vectorssubb(<16 x i8 > %a, <16 x i8 > %b) {
+; CHECK-LABEL: vectorssubb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsubsbs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %call
+}
+
+define dso_local <16 x i8 > @vectoruaddb(<16 x i8 > %a, <16 x i8 > %b) {
+; CHECK-LABEL: vectoruaddb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vaddubs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %call
+}
+
+define dso_local <16 x i8 > @vectorusubb(<16 x i8 > %a, <16 x i8 > %b) {
+; CHECK-LABEL: vectorusubb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsububs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %call
+}
+
+define dso_local <8 x i16 > @vectorsaddh(<8 x i16 > %a, <8 x i16 > %b) {
+; CHECK-LABEL: vectorsaddh:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vaddshs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %call
+}
+
+define dso_local <8 x i16 > @vectorssubh(<8 x i16 > %a, <8 x i16 > %b) {
+; CHECK-LABEL: vectorssubh:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsubshs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %call
+}
+
+define dso_local <8 x i16 > @vectoruaddh(<8 x i16 > %a, <8 x i16 > %b) {
+; CHECK-LABEL: vectoruaddh:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vadduhs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %call
+}
+
+define dso_local <8 x i16 > @vectorusubh(<8 x i16 > %a, <8 x i16 > %b) {
+; CHECK-LABEL: vectorusubh:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsubuhs v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %call
+}
+
+define dso_local <4 x i32 > @vectorsaddw(<4 x i32 > %a, <4 x i32 > %b) {
+; CHECK-LABEL: vectorsaddw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vaddsws v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %call
+}
+
+define dso_local <4 x i32 > @vectorssubw(<4 x i32 > %a, <4 x i32 > %b) {
+; CHECK-LABEL: vectorssubw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsubsws v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %call
+}
+
+define dso_local <4 x i32 > @vectoruaddw(<4 x i32 > %a, <4 x i32 > %b) {
+; CHECK-LABEL: vectoruaddw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vadduws v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %call
+}
+
+define dso_local <4 x i32 > @vectorusubw(<4 x i32 > %a, <4 x i32 > %b) {
+; CHECK-LABEL: vectorusubw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsubuws v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %call = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %call
+}
+
+declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)