[llvm] r289523 - [X86][InstCombine] Fix SimplifyDemandedVectorElts to handle frcz scalar intrinsics correctly.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 12 23:45:45 PST 2016
Author: ctopper
Date: Tue Dec 13 01:45:45 2016
New Revision: 289523
URL: http://llvm.org/viewvc/llvm-project?rev=289523&view=rev
Log:
[X86][InstCombine] Fix SimplifyDemandedVectorElts to handle frcz scalar intrinsics correctly.
Only the lower bits of the input element are used. And only the lower element can be undef since the upper bits are zeroed.
Have InstCombineCalls call SimplifyDemandedVectorElts for these intrinsics to reuse this support.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
llvm/trunk/test/Transforms/InstCombine/x86-xop.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=289523&r1=289522&r2=289523&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Dec 13 01:45:45 2016
@@ -1837,6 +1837,19 @@ Instruction *InstCombiner::visitCallInst
break;
}
+ case Intrinsic::x86_xop_vfrcz_ss:
+ case Intrinsic::x86_xop_vfrcz_sd: {
+ unsigned VWidth = II->getType()->getVectorNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+ if (V != II)
+ return replaceInstUsesWith(*II, V);
+ return II;
+ }
+ break;
+ }
+
// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).
// Constant fold shl( <A x Bi>, Ci ).
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=289523&r1=289522&r2=289523&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Tue Dec 13 01:45:45 2016
@@ -1264,9 +1264,14 @@ Value *InstCombiner::SimplifyDemandedVec
if (!DemandedElts[0])
return ConstantAggregateZero::get(II->getType());
+ // Only the lower element is used.
+ DemandedElts = 1;
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+ // Only the lower element is undefined. The high elements are zero.
+ UndefElts = UndefElts[0];
break;
// Unary scalar-as-vector operations that work column-wise.
Modified: llvm/trunk/test/Transforms/InstCombine/x86-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-xop.ll?rev=289523&r1=289522&r2=289523&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-xop.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-xop.ll Tue Dec 13 01:45:45 2016
@@ -1,6 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
+define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
+; CHECK-LABEL: @test_vfrcz_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
+ ret <2 x double> %2
+}
+
define double @test_vfrcz_sd_0(double %a) {
; CHECK-LABEL: @test_vfrcz_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -26,6 +36,18 @@ define double @test_vfrcz_sd_1(double %a
ret double %4
}
+define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
+; CHECK-LABEL: @test_vfrcz_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
+ ret <4 x float> %4
+}
+
define float @test_vfrcz_ss_0(float %a) {
; CHECK-LABEL: @test_vfrcz_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
More information about the llvm-commits
mailing list