[llvm] r207577 - Two fixes to the vpermilvar optimization.

Tue Apr 29 13:41:54 PDT 2014

Author: rafael
Date: Tue Apr 29 15:41:54 2014
New Revision: 207577

URL: http://llvm.org/viewvc/llvm-project?rev=207577&view=rev
Log:
Two fixes to the vpermilvar optimization.

The instcomine logic to handle vpermilvar's pd and 256 variants was incorrect.
The _256 variants have indexes into the individual 128 bit lanes and in all
cases it also has to mask out unused bits.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=207577&r1=207576&r2=207577&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Apr 29 15:41:54 2014
@@ -725,9 +725,32 @@ Instruction *InstCombiner::visitCallInst
     // Convert vpermil* to shufflevector if the mask is constant.
     Value *V = II->getArgOperand(1);
     if (auto C = dyn_cast<ConstantDataVector>(V)) {
+      unsigned Size = C->getNumElements();
+      assert(Size == 8 || Size == 4 || Size == 2);
+      uint32_t Indexes[8];
+
+      // The intrinsics only read one or two bits, clear the rest.
+      for (unsigned I = 0; I < Size; ++I) {
+	uint32_t Index = C->getElementAsInteger(I) & 0x3;
+	if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
+	    II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
+	  Index >>= 1;
+        Indexes[I] = Index;
+      }
+
+      // The _256 variants are a bit trickier since the mask bits always index
+      // into the corresponding 128 half. In order to convert to a generic
+      // shuffle, we have to make that explicit.
+      if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
+          II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
+        for (unsigned I = Size / 2; I < Size; ++I)
+          Indexes[I] += Size / 2;
+      }
+      auto NewC =
+          ConstantDataVector::get(C->getContext(), makeArrayRef(Indexes, Size));
       auto V1 = II->getArgOperand(0);
       auto V2 = UndefValue::get(V1->getType());
-      auto Shuffle = Builder->CreateShuffleVector(V1, V2, C);
+      auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
       return ReplaceInstUsesWith(CI, Shuffle);
     }
     break;

Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=207577&r1=207576&r2=207577&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Tue Apr 29 15:41:54 2014
@@ -318,7 +318,7 @@ define <4 x float> @test_vpermilvar_ps(<
 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
 define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
 ; CHECK-LABEL: @test_vpermilvar_ps_256(
-; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
   ret <8 x float> %a
 }
@@ -327,15 +327,15 @@ declare <2 x double> @llvm.x86.avx.vperm
 define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
 ; CHECK-LABEL: @test_vpermilvar_pd(
 ; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 1, i32 0>)
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 2, i32 0>)
   ret <2 x double> %a
 }
 
 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>)
 define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
 ; CHECK-LABEL: @test_vpermilvar_pd_256(
-; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 1, i32 2, i32 0>)
   ret <4 x double> %a
 }