[llvm] r332202 - [X86] Extend instcombine folds for pclmuldq intrinsics to the 256 and 512 bit version.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun May 13 14:56:32 PDT 2018
Author: ctopper
Date: Sun May 13 14:56:32 2018
New Revision: 332202
URL: http://llvm.org/viewvc/llvm-project?rev=332202&view=rev
Log:
[X86] Extend instcombine folds for pclmuldq intrinsics to the 256 and 512 bit version.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=332202&r1=332201&r2=332202&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sun May 13 14:56:32 2018
@@ -2553,7 +2553,9 @@ Instruction *InstCombiner::visitCallInst
return replaceInstUsesWith(*II, V);
break;
- case Intrinsic::x86_pclmulqdq: {
+ case Intrinsic::x86_pclmulqdq:
+ case Intrinsic::x86_pclmulqdq_256:
+ case Intrinsic::x86_pclmulqdq_512: {
if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
unsigned Imm = C->getZExtValue();
@@ -2561,27 +2563,28 @@ Instruction *InstCombiner::visitCallInst
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
unsigned VWidth = Arg0->getType()->getVectorNumElements();
- APInt DemandedElts(VWidth, 0);
APInt UndefElts1(VWidth, 0);
- DemandedElts = (Imm & 0x01) ? 2 : 1;
- if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
+ APInt DemandedElts1 = APInt::getSplat(VWidth,
+ APInt(2, (Imm & 0x01) ? 2 : 1));
+ if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
UndefElts1)) {
II->setArgOperand(0, V);
MadeChange = true;
}
APInt UndefElts2(VWidth, 0);
- DemandedElts = (Imm & 0x10) ? 2 : 1;
- if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
+ APInt DemandedElts2 = APInt::getSplat(VWidth,
+ APInt(2, (Imm & 0x10) ? 2 : 1));
+ if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
UndefElts2)) {
II->setArgOperand(1, V);
MadeChange = true;
}
- // If both input elements are undef, the result is undef.
- if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
- UndefElts2[(Imm & 0x10) ? 1 : 0])
+ // If either input elements are undef, the result is zero.
+ if (DemandedElts1.isSubsetOf(UndefElts1) ||
+ DemandedElts2.isSubsetOf(UndefElts2))
return replaceInstUsesWith(*II,
ConstantAggregateZero::get(II->getType()));
Modified: llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll?rev=332202&r1=332201&r2=332202&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/clmulqdq.ll Sun May 13 14:56:32 2018
@@ -2,6 +2,8 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8)
+declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8)
+declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8)
define <2 x i64> @test_demanded_elts_pclmulqdq_0(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: @test_demanded_elts_pclmulqdq_0(
@@ -78,3 +80,187 @@ define <2 x i64> @test_demanded_elts_pcl
%1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 1, i64 undef>, i8 17)
ret <2 x i64> %1
}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_0(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_0(
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT: ret <4 x i64> [[RES]]
+;
+ %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <4 x i64> %1, i64 1, i64 3
+ %4 = insertelement <4 x i64> %2, i64 1, i64 3
+ %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 0)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_1(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_1(
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT: ret <4 x i64> [[RES]]
+;
+ %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <4 x i64> %1, i64 1, i64 3
+ %4 = insertelement <4 x i64> %2, i64 1, i64 3
+ %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_16(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_16(
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 16)
+; CHECK-NEXT: ret <4 x i64> [[RES]]
+;
+ %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <4 x i64> %1, i64 1, i64 3
+ %4 = insertelement <4 x i64> %2, i64 1, i64 3
+ %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 16)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_17(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_17(
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 17)
+; CHECK-NEXT: ret <4 x i64> [[RES]]
+;
+ %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <4 x i64> %1, i64 1, i64 3
+ %4 = insertelement <4 x i64> %2, i64 1, i64 3
+ %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 17)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_0(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
+ %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_1(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
+ %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 1)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_16(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
+ %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, i8 16)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_17(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
+ %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, <4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, i8 17)
+ ret <4 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_0(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_0(
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <8 x i64> %1, i64 1, i64 3
+ %4 = insertelement <8 x i64> %2, i64 1, i64 3
+ %5 = insertelement <8 x i64> %3, i64 1, i64 5
+ %6 = insertelement <8 x i64> %4, i64 1, i64 5
+ %7 = insertelement <8 x i64> %5, i64 1, i64 7
+ %8 = insertelement <8 x i64> %6, i64 1, i64 7
+ %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 0)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_1(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_1(
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <8 x i64> %1, i64 1, i64 3
+ %4 = insertelement <8 x i64> %2, i64 1, i64 3
+ %5 = insertelement <8 x i64> %3, i64 1, i64 5
+ %6 = insertelement <8 x i64> %4, i64 1, i64 5
+ %7 = insertelement <8 x i64> %5, i64 1, i64 7
+ %8 = insertelement <8 x i64> %6, i64 1, i64 7
+ %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_16(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_16(
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 16)
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <8 x i64> %1, i64 1, i64 3
+ %4 = insertelement <8 x i64> %2, i64 1, i64 3
+ %5 = insertelement <8 x i64> %3, i64 1, i64 5
+ %6 = insertelement <8 x i64> %4, i64 1, i64 5
+ %7 = insertelement <8 x i64> %5, i64 1, i64 7
+ %8 = insertelement <8 x i64> %6, i64 1, i64 7
+ %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 16)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_17(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_17(
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 17)
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+ %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+ %3 = insertelement <8 x i64> %1, i64 1, i64 3
+ %4 = insertelement <8 x i64> %2, i64 1, i64 3
+ %5 = insertelement <8 x i64> %3, i64 1, i64 5
+ %6 = insertelement <8 x i64> %4, i64 1, i64 5
+ %7 = insertelement <8 x i64> %5, i64 1, i64 7
+ %8 = insertelement <8 x i64> %6, i64 1, i64 7
+ %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 17)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_0(
+; CHECK-NEXT: ret <8 x i64> zeroinitializer
+;
+ %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 0)
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_1(
+; CHECK-NEXT: ret <8 x i64> zeroinitializer
+;
+ %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 1)
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_16(
+; CHECK-NEXT: ret <8 x i64> zeroinitializer
+;
+ %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, i8 16)
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_17(
+; CHECK-NEXT: ret <8 x i64> zeroinitializer
+;
+ %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, <8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, i8 17)
+ ret <8 x i64> %1
+}
More information about the llvm-commits
mailing list