[llvm] 31ecef7 - [SystemZ] Don't create PERMUTE nodes with an undef operand.

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Mon May 18 10:46:38 PDT 2020


Author: Jonas Paulsson
Date: 2020-05-18T19:42:14+02:00
New Revision: 31ecef76275158c87d63772a70fbc282d025e7ab

URL: https://github.com/llvm/llvm-project/commit/31ecef76275158c87d63772a70fbc282d025e7ab
DIFF: https://github.com/llvm/llvm-project/commit/31ecef76275158c87d63772a70fbc282d025e7ab.diff

LOG: [SystemZ] Don't create PERMUTE nodes with an undef operand.

It's better to reuse the first source value than to use an undef second
operand, because that will make more resulting VPERMs have identical operands
and therefore MachineCSE more successful.

Review: Ulrich Weigand

Added: 
    llvm/test/CodeGen/SystemZ/vec-perm-14.ll

Modified: 
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 0ce6f3177224..7a8b5249255f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
     else
       IndexNodes[I] = DAG.getUNDEF(MVT::i32);
   SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
-  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
+                     (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
 }
 
 namespace {

diff  --git a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
new file mode 100644
index 000000000000..0cf3c6ef7a06
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+;
+; Test that only one vperm of the vector compare is needed for both extracts.
+
+define void @fun() {
+; CHECK-LABEL: fun
+; CHECK: vperm
+; CHECK-NOT: vperm
+bb:
+  %tmp = load <4 x i8>, <4 x i8>* undef
+  %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
+  %tmp2 = extractelement <4 x i1> %tmp1, i32 0
+  br i1 %tmp2, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp3 = extractelement <4 x i1> %tmp1, i32 1
+  br i1 %tmp3, label %bb3, label %bb4
+
+bb3:
+  unreachable
+
+bb4:
+  unreachable
+}

diff  --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 7cea2ff8eb9c..b7cbac89db31 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %
 ; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    vl %v1, 0(%r2), 4
+; SZ13-NEXT:    ld %f0, 16(%r2)
 ; SZ13-NEXT:    vledb %v1, %v1, 0, 0
 ; SZ13-NEXT:    larl %r1, .LCPI97_0
-; SZ13-NEXT:    ld %f0, 16(%r2)
-; SZ13-NEXT:    vl %v2, 0(%r1), 3
-; SZ13-NEXT:    vperm %v1, %v1, %v0, %v2
 ; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    vl %v2, 0(%r1), 3
+; SZ13-NEXT:    vperm %v1, %v1, %v1, %v2
 ; SZ13-NEXT:    ste %f0, 8(%r3)
 ; SZ13-NEXT:    vsteg %v1, 0(%r3), 0
 ; SZ13-NEXT:    br %r14


        


More information about the llvm-commits mailing list