[PATCH] D145353: [PowerPC] remove side effect for some cases for saturate instructions

ChenZheng via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 5 22:29:05 PST 2023


shchenz created this revision.
shchenz added reviewers: nemanjai, PowerPC.
shchenz added a project: LLVM.
Herald added subscribers: kbarton, hiraditya.
Herald added a project: All.
shchenz requested review of this revision.
Herald added a subscriber: llvm-commits.

Fixes https://github.com/llvm/llvm-project/issues/60684


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D145353

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll


Index: llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll
+++ llvm/test/CodeGen/PowerPC/vector-sum-sat-bit-side-effect.ll
@@ -9,8 +9,6 @@
 define void @test1(<16 x i8> %0) {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v3, v3, v3
-; CHECK-NEXT:    vsum4sbs v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
   %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4sbs(<16 x i8> %0, <4 x i32> zeroinitializer)
@@ -20,8 +18,6 @@
 define void @test2(<8 x i16> %0) {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v3, v3, v3
-; CHECK-NEXT:    vsum4shs v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
   %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4shs(<8 x i16> %0, <4 x i32> zeroinitializer)
@@ -31,8 +27,6 @@
 define void @test3(<16 x i8> %0) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v3, v3, v3
-; CHECK-NEXT:    vsum4ubs v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
   %1 = tail call <4 x i32> @llvm.ppc.altivec.vsum4ubs(<16 x i8> %0, <4 x i32> zeroinitializer)
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15786,19 +15786,39 @@
     }
 
     break;
-  case ISD::INTRINSIC_W_CHAIN:
-    // For little endian, VSX loads require generating lxvd2x/xxswapd.
-    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
-    if (Subtarget.needsSwapsForVSXMemOps()) {
-      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
-      default:
-        break;
-      case Intrinsic::ppc_vsx_lxvw4x:
-      case Intrinsic::ppc_vsx_lxvd2x:
-        return expandVSXLoadForLE(N, DCI);
+    case ISD::INTRINSIC_W_CHAIN: {
+      unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+      if (IID == Intrinsic::ppc_altivec_vsum4sbs ||
+          IID == Intrinsic::ppc_altivec_vsum4shs ||
+          IID == Intrinsic::ppc_altivec_vsum4ubs) {
+        if (BuildVectorSDNode *BVN =
+                dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
+          APInt APSplatBits, APSplatUndef;
+          unsigned SplatBitSize;
+          bool HasAnyUndefs;
+          bool BVNIsConstantSplat = BVN->isConstantSplat(
+              APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
+              !Subtarget.isLittleEndian());
+          // If the constant splat vector is 0, SAT bit will not be changed.
+          // The intrinsic should not have any side effect for this case.
+          if (BVNIsConstantSplat && APSplatBits == 0)
+            DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
+        }
+        return SDValue();
       }
+      // For little endian, VSX loads require generating lxvd2x/xxswapd.
+      // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+      if (Subtarget.needsSwapsForVSXMemOps()) {
+        switch (IID) {
+        default:
+          break;
+        case Intrinsic::ppc_vsx_lxvw4x:
+        case Intrinsic::ppc_vsx_lxvd2x:
+          return expandVSXLoadForLE(N, DCI);
+        }
+      }
+      break;
     }
-    break;
   case ISD::INTRINSIC_VOID:
     // For little endian, VSX stores require generating xxswapd/stxvd2x.
     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D145353.502520.patch
Type: text/x-patch
Size: 3529 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230306/8d0d761b/attachment.bin>


More information about the llvm-commits mailing list