[llvm] r332202 - [X86] Extend instcombine folds for pclmuldq intrinsics to the 256 and 512 bit version.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun May 13 14:56:32 PDT 2018


Author: ctopper
Date: Sun May 13 14:56:32 2018
New Revision: 332202

URL: http://llvm.org/viewvc/llvm-project?rev=332202&view=rev
Log:
[X86] Extend instcombine folds for pclmuldq intrinsics to the 256 and 512 bit version.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=332202&r1=332201&r2=332202&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sun May 13 14:56:32 2018
@@ -2553,7 +2553,9 @@ Instruction *InstCombiner::visitCallInst
       return replaceInstUsesWith(*II, V);
     break;
 
-  case Intrinsic::x86_pclmulqdq: {
+  case Intrinsic::x86_pclmulqdq:
+  case Intrinsic::x86_pclmulqdq_256:
+  case Intrinsic::x86_pclmulqdq_512: {
     if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
       unsigned Imm = C->getZExtValue();
 
@@ -2561,27 +2563,28 @@ Instruction *InstCombiner::visitCallInst
       Value *Arg0 = II->getArgOperand(0);
       Value *Arg1 = II->getArgOperand(1);
       unsigned VWidth = Arg0->getType()->getVectorNumElements();
-      APInt DemandedElts(VWidth, 0);
 
       APInt UndefElts1(VWidth, 0);
-      DemandedElts = (Imm & 0x01) ? 2 : 1;
-      if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
+      APInt DemandedElts1 = APInt::getSplat(VWidth,
+                                            APInt(2, (Imm & 0x01) ? 2 : 1));
+      if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
                                                 UndefElts1)) {
         II->setArgOperand(0, V);
         MadeChange = true;
       }
 
       APInt UndefElts2(VWidth, 0);
-      DemandedElts = (Imm & 0x10) ? 2 : 1;
-      if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
+      APInt DemandedElts2 = APInt::getSplat(VWidth,
+                                            APInt(2, (Imm & 0x10) ? 2 : 1));
+      if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
                                                 UndefElts2)) {
         II->setArgOperand(1, V);
         MadeChange = true;
       }
 
-      // If both input elements are undef, the result is undef.
-      if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
-          UndefElts2[(Imm & 0x10) ? 1 : 0])
+      // If either input elements are undef, the result is zero.
+      if (DemandedElts1.isSubsetOf(UndefElts1) ||
+          DemandedElts2.isSubsetOf(UndefElts2))
         return replaceInstUsesWith(*II,
                                    ConstantAggregateZero::get(II->getType()));
 

Modified: llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll?rev=332202&r1=332201&r2=332202&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll Sun May 13 14:56:32 2018
@@ -2,6 +2,8 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8)
+declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8)
+declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8)
 
 define <2 x i64> @test_demanded_elts_pclmulqdq_0(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_0(
@@ -78,3 +80,187 @@ define <2 x i64> @test_demanded_elts_pcl
   %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 1, i64 undef>, i8 17)
   ret <2 x i64> %1
 }
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_0(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_0(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 0)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_1(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_1(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 1)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_16(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_16(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 16)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 16)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_17(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_17(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 17)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 17)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_0(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_1(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 1)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_16(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, i8 16)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_17(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, <4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, i8 17)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_0(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_0(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 0)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_1(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_1(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_16(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_16(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 16)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 16)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_17(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_17(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 17)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 17)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_0(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_1(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 1)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_16(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, i8 16)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_17(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, <8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, i8 17)
+  ret <8 x i64> %1
+}




More information about the llvm-commits mailing list