[llvm] r289411 - [InstCombine][XOP] The instructions for the scalar frcz intrinsics are defined to put 0 in the upper bits, not pass bits through like other intrinsics. So we should return a zero vector instead.

Sun Dec 11 14:32:38 PST 2016

Author: ctopper
Date: Sun Dec 11 16:32:38 2016
New Revision: 289411

URL: http://llvm.org/viewvc/llvm-project?rev=289411&view=rev
Log:
[InstCombine][XOP] The instructions for the scalar frcz intrinsics are defined to put 0 in the upper bits, not pass bits through like other intrinsics. So we should return a zero vector instead.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-xop.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=289411&r1=289410&r2=289411&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Sun Dec 11 16:32:38 2016
@@ -1255,13 +1255,25 @@ Value *InstCombiner::SimplifyDemandedVec
     switch (II->getIntrinsicID()) {
     default: break;
 
+    case Intrinsic::x86_xop_vfrcz_ss:
+    case Intrinsic::x86_xop_vfrcz_sd:
+      // The instructions for these intrinsics are speced to zero upper bits not
+      // pass them through like other scalar intrinsics. So we shouldn't just
+      // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
+      // Instead we should return a zero vector.
+      if (!DemandedElts[0])
+        return ConstantAggregateZero::get(II->getType());
+
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts, Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      break;
+
     // Unary scalar-as-vector operations that work column-wise.
     case Intrinsic::x86_sse_rcp_ss:
     case Intrinsic::x86_sse_rsqrt_ss:
     case Intrinsic::x86_sse_sqrt_ss:
     case Intrinsic::x86_sse2_sqrt_sd:
-    case Intrinsic::x86_xop_vfrcz_ss:
-    case Intrinsic::x86_xop_vfrcz_sd:
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                         UndefElts, Depth + 1);
       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }

Modified: llvm/trunk/test/Transforms/InstCombine/x86-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-xop.ll?rev=289411&r1=289410&r2=289411&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-xop.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-xop.ll Sun Dec 11 16:32:38 2016
@@ -17,7 +17,7 @@ define double @test_vfrcz_sd_0(double %a
 
 define double @test_vfrcz_sd_1(double %a) {
 ; CHECK-LABEL: @test_vfrcz_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
+; CHECK-NEXT:    ret double 0.000000e+00
 ;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
@@ -44,7 +44,7 @@ define float @test_vfrcz_ss_0(float %a)
 
 define float @test_vfrcz_ss_3(float %a) {
 ; CHECK-LABEL: @test_vfrcz_ss_3(
-; CHECK-NEXT:    ret float 3.000000e+00
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1