[llvm] r349625 - [TargetLowering] Fix propagation of undefs in zero extension ops (PR40091)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 19 05:37:59 PST 2018


Author: rksimon
Date: Wed Dec 19 05:37:59 2018
New Revision: 349625

URL: http://llvm.org/viewvc/llvm-project?rev=349625&view=rev
Log:
[TargetLowering] Fix propagation of undefs in zero extension ops (PR40091)

As described on PR40091, we have several places where zext (and zext_vector_inreg) fold an undef input into an undef output. For zero extensions this is incorrect as the output should guarantee to least have the new upper bits set to zero.

SimplifyDemandedVectorElts is the worst offender (and its the most likely to cause new undefs to appear) but DAGCombiner's tryToFoldExtendOfConstant has a similar issue.

Thanks to @dmgreen for catching this.

Differential Revision: https://reviews.llvm.org/D55883

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/pr40091.ll
    llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=349625&r1=349624&r2=349625&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Dec 19 05:37:59 2018
@@ -8064,10 +8064,15 @@ static SDValue tryToFoldExtendOfConstant
   unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
 
-  for (unsigned i=0; i != NumElts; ++i) {
-    SDValue Op = N0->getOperand(i);
-    if (Op->isUndef()) {
-      Elts.push_back(DAG.getUNDEF(SVT));
+  // For zero-extensions, UNDEF elements still guarantee to have the upper
+  // bits set to zero.
+  bool IsZext =
+      Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    SDValue Op = N0.getOperand(i);
+    if (Op.isUndef()) {
+      Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
       continue;
     }
 

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=349625&r1=349624&r2=349625&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Dec 19 05:37:59 2018
@@ -1848,6 +1848,13 @@ bool TargetLowering::SimplifyDemandedVec
       return true;
     KnownZero = SrcZero.zextOrTrunc(NumElts);
     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+    if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+      // zext(undef) upper bits are guaranteed to be zero.
+      if (DemandedElts.isSubsetOf(KnownUndef))
+        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+      KnownUndef.clearAllBits();
+    }
     break;
   }
   case ISD::OR:
@@ -1892,6 +1899,13 @@ bool TargetLowering::SimplifyDemandedVec
     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
                                    KnownZero, TLO, Depth + 1))
       return true;
+
+    if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+      // zext(undef) upper bits are guaranteed to be zero.
+      if (DemandedElts.isSubsetOf(KnownUndef))
+        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+      KnownUndef.clearAllBits();
+    }
     break;
   default: {
     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {

Modified: llvm/trunk/test/CodeGen/AArch64/pr40091.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/pr40091.ll?rev=349625&r1=349624&r2=349625&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/pr40091.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/pr40091.ll Wed Dec 19 05:37:59 2018
@@ -4,7 +4,7 @@
 define i64 @test(i64 %aa) {
 ; CHECK-LABEL: test:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-NEXT:    movi v0.8b, #137
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
 entry:

Modified: llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll?rev=349625&r1=349624&r2=349625&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll Wed Dec 19 05:37:59 2018
@@ -261,12 +261,12 @@ define <4 x i64> @test_zext_4i8_4i64() {
 define <4 x i16> @test_zext_4i8_4i16_undef() {
 ; X32-LABEL: test_zext_4i8_4i16_undef:
 ; X32:       # %bb.0:
-; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253>
+; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,0,253]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_zext_4i8_4i16_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253>
+; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,0,253]
 ; X64-NEXT:    retq
   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
@@ -279,12 +279,12 @@ define <4 x i16> @test_zext_4i8_4i16_und
 define <4 x i32> @test_zext_4i8_4i32_undef() {
 ; X32-LABEL: test_zext_4i8_4i32_undef:
 ; X32:       # %bb.0:
-; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <0,u,2,u>
+; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,2,0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_zext_4i8_4i32_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <0,u,2,u>
+; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,2,0]
 ; X64-NEXT:    retq
   %1 = insertelement <4 x i8> undef, i8 0, i32 0
   %2 = insertelement <4 x i8> %1, i8 undef, i32 1
@@ -297,12 +297,12 @@ define <4 x i32> @test_zext_4i8_4i32_und
 define <4 x i64> @test_zext_4i8_4i64_undef() {
 ; X32-LABEL: test_zext_4i8_4i64_undef:
 ; X32:       # %bb.0:
-; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <u,u,255,0,2,0,u,u>
+; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_zext_4i8_4i64_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <u,255,2,u>
+; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,2,0]
 ; X64-NEXT:    retq
   %1 = insertelement <4 x i8> undef, i8 undef, i32 0
   %2 = insertelement <4 x i8> %1, i8 -1, i32 1
@@ -359,12 +359,12 @@ define <8 x i32> @test_zext_8i8_8i32() {
 define <8 x i16> @test_zext_8i8_8i16_undef() {
 ; X32-LABEL: test_zext_8i8_8i16_undef:
 ; X32:       # %bb.0:
-; X32-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
+; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_zext_8i8_8i16_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
+; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
 ; X64-NEXT:    retq
   %1 = insertelement <8 x i8> undef, i8 undef, i32 0
   %2 = insertelement <8 x i8> %1, i8 -1, i32 1
@@ -381,12 +381,12 @@ define <8 x i16> @test_zext_8i8_8i16_und
 define <8 x i32> @test_zext_8i8_8i32_undef() {
 ; X32-LABEL: test_zext_8i8_8i32_undef:
 ; X32:       # %bb.0:
-; X32-NEXT:    vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
+; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_zext_8i8_8i32_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
+; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
 ; X64-NEXT:    retq
   %1 = insertelement <8 x i8> undef, i8 0, i32 0
   %2 = insertelement <8 x i8> %1, i8 undef, i32 1




More information about the llvm-commits mailing list