[llvm] r352653 - SimplifyDemandedVectorElts for all intrinsics
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 30 11:21:11 PST 2019
Author: reames
Date: Wed Jan 30 11:21:11 2019
New Revision: 352653
URL: http://llvm.org/viewvc/llvm-project?rev=352653&view=rev
Log:
SimplifyDemandedVectorElts for all intrinsics
The point is that this simplifies integration of new intrinsics into SimplifiedDemandedVectorElts, and ensures we don't miss any existing ones.
This is intended to be NFC-ish, but as seen from the diffs, can produce slightly different output. This is due to order of transforms w/in instcombine resulting in two slightly different fixed points. That's something we should fix, but isn't a problem w/this patch per se.
Differential Revision: https://reviews.llvm.org/D57398
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=352653&r1=352652&r2=352653&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Wed Jan 30 11:21:11 2019
@@ -1868,6 +1868,19 @@ Instruction *InstCombiner::visitCallInst
if (Changed) return II;
}
+ // For vector result intrinsics, use the generic demanded vector support to
+ // simplify any operands before moving on to the per-intrinsic rules.
+ if (II->getType()->isVectorTy()) {
+ auto VWidth = II->getType()->getVectorNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+ if (V != II)
+ return replaceInstUsesWith(*II, V);
+ return II;
+ }
+ }
+
if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
return I;
@@ -2666,41 +2679,11 @@ Instruction *InstCombiner::visitCallInst
return replaceInstUsesWith(*II, V);
}
}
- LLVM_FALLTHROUGH;
+ break;
- // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
- case Intrinsic::x86_avx512_mask_max_ss_round:
- case Intrinsic::x86_avx512_mask_min_ss_round:
- case Intrinsic::x86_avx512_mask_max_sd_round:
- case Intrinsic::x86_avx512_mask_min_sd_round:
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse_min_ss:
- case Intrinsic::x86_sse_max_ss:
- case Intrinsic::x86_sse2_cmp_sd:
- case Intrinsic::x86_sse2_min_sd:
- case Intrinsic::x86_sse2_max_sd:
- case Intrinsic::x86_xop_vfrcz_ss:
- case Intrinsic::x86_xop_vfrcz_sd: {
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- }
- break;
- }
case Intrinsic::x86_sse41_round_ss:
case Intrinsic::x86_sse41_round_sd: {
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- } else if (Value *V = simplifyX86round(*II, Builder))
+ if (Value *V = simplifyX86round(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
}
Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll?rev=352653&r1=352652&r2=352653&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll Wed Jan 30 11:21:11 2019
@@ -7,7 +7,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_add_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP4]]
@@ -38,7 +38,7 @@ define <4 x float> @test_add_ss_mask(<4
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP8]]
@@ -149,7 +149,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_sub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP4]]
@@ -180,7 +180,7 @@ define <4 x float> @test_sub_ss_mask(<4
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP8]]
@@ -291,7 +291,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_mul_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP4]]
@@ -322,7 +322,7 @@ define <4 x float> @test_mul_ss_mask(<4
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP8]]
@@ -433,7 +433,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_div_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP4]]
@@ -464,7 +464,7 @@ define <4 x float> @test_div_ss_mask(<4
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP8]]
More information about the llvm-commits
mailing list