r229469 - [X86] Merge the 2 separate builtin handlers for PALIGNR into a single one that handles both.

Mon Feb 16 22:37:58 PST 2015

Author: ctopper
Date: Tue Feb 17 00:37:58 2015
New Revision: 229469

URL: http://llvm.org/viewvc/llvm-project?rev=229469&view=rev
Log:
[X86] Merge the 2 separate builtin handlers for PALIGNR into a single one that handles both.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/sse-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=229469&r1=229468&r2=229469&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Feb 17 00:37:58 2015
@@ -5926,51 +5926,27 @@ Value *CodeGenFunction::EmitX86BuiltinEx
     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
     return Builder.CreateStore(Ops[1], Ops[0]);
   }
-  case X86::BI__builtin_ia32_palignr128: {
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
-
-    // If palignr is shifting the pair of input vectors less than 17 bytes,
-    // emit a shuffle instruction.
-    if (shiftVal <= 16) {
-      SmallVector<llvm::Constant*, 16> Indices;
-      for (unsigned i = 0; i != 16; ++i)
-        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
-
-      Value* SV = llvm::ConstantVector::get(Indices);
-      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
-    }
-
-    // If palignr is shifting the pair of input vectors more than 16 but less
-    // than 32 bytes, emit a logical right shift of the destination.
-    if (shiftVal < 32) {
-      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
-
-      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
-
-      // create i32 constant
-      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
-      return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
-    }
-
-    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
-  }
+  case X86::BI__builtin_ia32_palignr128:
   case X86::BI__builtin_ia32_palignr256: {
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
 
-    // If palignr is shifting the pair of input vectors less than 17 bytes,
-    // emit a shuffle instruction.
-    if (shiftVal <= 16) {
+    unsigned NumElts =
+      cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+    assert(NumElts % 16 == 0);
+    unsigned NumLanes = NumElts / 16;
+    unsigned NumLaneElts = NumElts / NumLanes;
+
+    // If palignr is shifting the pair of input vectors less than the size of
+    // a lane, emit a shuffle instruction.
+    if (ShiftVal <= NumLaneElts) {
       SmallVector<llvm::Constant*, 32> Indices;
       // 256-bit palignr operates on 128-bit lanes so we need to handle that
-      for (unsigned l = 0; l != 2; ++l) {
-        unsigned LaneStart = l * 16;
-        unsigned LaneEnd = (l+1) * 16;
-        for (unsigned i = 0; i != 16; ++i) {
-          unsigned Idx = shiftVal + i + LaneStart;
-          if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
-          Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
+      for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+        for (unsigned i = 0; i != NumLaneElts; ++i) {
+          unsigned Idx = ShiftVal + i;
+          if (Idx >= NumLaneElts)
+            Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+          Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
         }
       }
 
@@ -5978,21 +5954,32 @@ Value *CodeGenFunction::EmitX86BuiltinEx
       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
     }
 
-    // If palignr is shifting the pair of input vectors more than 16 but less
-    // than 32 bytes, emit a logical right shift of the destination.
-    if (shiftVal < 32) {
-      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
-
-      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
-
-      // create i32 constant
-      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
-      return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
+    // If palignr is shifting the pair of vectors more than the size of two
+    // lanes, emit zero.
+    if (ShiftVal >= (2 * NumLaneElts))
+      return llvm::Constant::getNullValue(ConvertType(E->getType()));
+
+    // If palignr is shifting the pair of input vectors more than one lane,
+    // but less than two lanes, emit a shift.
+    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, NumElts/8);
+
+    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+    Ops[1] = llvm::ConstantInt::get(Int32Ty, (ShiftVal-NumLaneElts) * 8);
+
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_palignr128:
+      ID = Intrinsic::x86_sse2_psrl_dq;
+      break;
+    case X86::BI__builtin_ia32_palignr256:
+      ID = Intrinsic::x86_avx2_psrl_dq;
+      break;
     }
 
-    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
+    // create i32 constant
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
   }
   case X86::BI__builtin_ia32_pslldqi256: {
     // Shift value is in bits so divide by 8.

Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=229469&r1=229468&r2=229469&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Tue Feb 17 00:37:58 2015
@@ -567,3 +567,13 @@ __m128 test_mm_bsrli_si128(__m128 a) {
   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
   return _mm_bsrli_si128(a, 5);
 }
+
+__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  return _mm_alignr_epi8(a, b, 2);
+}
+
+__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CHECK: @llvm.x86.sse2.psrl.dq({{.*}}, i32 8)
+  return _mm_alignr_epi8(a, b, 17);
+}