[llvm] 72c4cbd - [PowerPC] Fix multi-use case for swap reduction

Thu Mar 11 06:01:51 PST 2021

Author: Qiu Chaofan
Date: 2021-03-11T21:58:33+08:00
New Revision: 72c4cbd60e53bda37fffa67528cd57564d37a775

URL: https://github.com/llvm/llvm-project/commit/72c4cbd60e53bda37fffa67528cd57564d37a775
DIFF: https://github.com/llvm/llvm-project/commit/72c4cbd60e53bda37fffa67528cd57564d37a775.diff

LOG: [PowerPC] Fix multi-use case for swap reduction

4c973ae implemented reduction of vector swap for lane-insensitive
operations. This commit fixes it for checking number of uses of the
vector operation.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/test/CodeGen/PowerPC/swap-reduction.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 2f341e43de45..d42cc364e974 100644

--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -6803,7 +6803,7 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
   };
 
   SDValue VecOp = SkipRCCopy(N->getOperand(0));
-  if (!isLaneInsensitive(VecOp))
+  if (!isLaneInsensitive(VecOp) || !VecOp.hasOneUse())
     return;
 
   SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),

diff  --git a/llvm/test/CodeGen/PowerPC/swap-reduction.ll b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
index 228f89897518..eb1f5b728b03 100644
--- a/llvm/test/CodeGen/PowerPC/swap-reduction.ll
+++ b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
@@ -55,4 +55,33 @@ entry:
   ret i64 %sum
 }
 
+; Ensure that vec-ops with multiple uses aren't simplified.
+define signext i16 @vecop_uses(i16* %addr) {
+; CHECK-LABEL: vecop_uses:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 4, 16
+; CHECK-NEXT:    lxvd2x 1, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 3, 4
+; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    xxswapd 34, 0
+; CHECK-NEXT:    vminsh 2, 3, 2
+; CHECK-NEXT:    xxswapd 35, 34
+; CHECK-NEXT:    vminsh 2, 2, 3
+; CHECK-NEXT:    xxspltw 35, 34, 2
+; CHECK-NEXT:    vminsh 2, 2, 3
+; CHECK-NEXT:    vsplth 3, 2, 6
+; CHECK-NEXT:    vminsh 2, 2, 3
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mffprd 3, 0
+; CHECK-NEXT:    clrldi 3, 3, 48
+; CHECK-NEXT:    extsh 3, 3
+; CHECK-NEXT:    blr
+entry:
+  %0 = bitcast i16* %addr to <16 x i16>*
+  %1 = load <16 x i16>, <16 x i16>* %0, align 2
+  %2 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %1)
+  ret i16 %2
+}
+
 declare <16 x i8> @llvm.ppc.altivec.vavgsb(<16 x i8>, <16 x i8>)
+declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)