[llvm] r292627 - [InstCombine][X86] Add MULDQ/MULUDQ undef handling

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 20 10:20:31 PST 2017


Author: rksimon
Date: Fri Jan 20 12:20:30 2017
New Revision: 292627

URL: http://llvm.org/viewvc/llvm-project?rev=292627&view=rev
Log:
[InstCombine][X86] Add MULDQ/MULUDQ undef handling

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=292627&r1=292626&r2=292627&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Fri Jan 20 12:20:30 2017
@@ -510,6 +510,18 @@ static Value *simplifyX86varShift(const
   return Builder.CreateAShr(Vec, ShiftVec);
 }
 
+static Value *simplifyX86muldq(const IntrinsicInst &II) {
+  Value *Arg0 = II.getArgOperand(0);
+  Value *Arg1 = II.getArgOperand(1);
+  Type *ResTy = II.getType();
+
+  // muldq/muludq(undef, undef) -> undef
+  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
+    return UndefValue::get(ResTy);
+
+  return nullptr;
+}
+
 static Value *simplifyX86movmsk(const IntrinsicInst &II,
                                 InstCombiner::BuilderTy &Builder) {
   Value *Arg = II.getArgOperand(0);
@@ -2142,6 +2154,9 @@ Instruction *InstCombiner::visitCallInst
   case Intrinsic::x86_avx2_pmulu_dq:
   case Intrinsic::x86_avx512_pmul_dq_512:
   case Intrinsic::x86_avx512_pmulu_dq_512: {
+    if (Value *V = simplifyX86muldq(*II))
+      return replaceInstUsesWith(*II, V);
+
     unsigned VWidth = II->getType()->getVectorNumElements();
     APInt UndefElts(VWidth, 0);
     APInt DemandedElts = APInt::getAllOnesValue(VWidth);

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=292627&r1=292626&r2=292627&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Fri Jan 20 12:20:30 2017
@@ -1469,6 +1469,12 @@ Value *InstCombiner::SimplifyDemandedVec
                                         Depth + 1);
       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
 
+      // Output elements are undefined if both are undefined.  Consider things
+      // like undef*0.  The result is known zero, not undef.
+      for (unsigned i = 0; i != VWidth; ++i)
+        if (UndefElts2[i * 2] && UndefElts3[i * 2])
+          UndefElts.setBit(i);
+
       break;
     }
 

Modified: llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll?rev=292627&r1=292626&r2=292627&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll Fri Jan 20 12:20:30 2017
@@ -7,8 +7,7 @@
 
 define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @undef_pmuludq_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
   %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
   ret <2 x i64> %1
@@ -16,8 +15,7 @@ define <2 x i64> @undef_pmuludq_128(<4 x
 
 define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: @undef_pmuludq_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <4 x i64> undef
 ;
   %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
   ret <4 x i64> %1
@@ -25,8 +23,7 @@ define <4 x i64> @undef_pmuludq_256(<8 x
 
 define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-LABEL: @undef_pmuludq_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <8 x i64> undef
 ;
   %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
   ret <8 x i64> %1
@@ -34,8 +31,7 @@ define <8 x i64> @undef_pmuludq_512(<16
 
 define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @undef_pmuldq_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
   %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
   ret <2 x i64> %1
@@ -43,8 +39,7 @@ define <2 x i64> @undef_pmuldq_128(<4 x
 
 define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: @undef_pmuldq_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <4 x i64> undef
 ;
   %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
   ret <4 x i64> %1
@@ -52,13 +47,66 @@ define <4 x i64> @undef_pmuldq_256(<8 x
 
 define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-LABEL: @undef_pmuldq_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <8 x i64> undef
 ;
   %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
   ret <8 x i64> %1
 }
 
+define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <8 x i32> undef)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 0, i32 undef, i32 0, i32 undef>, <4 x i32> undef)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <16 x i32> undef)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
 ;
 ; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
 ;




More information about the llvm-commits mailing list