[cfe-commits] r91264 - in /cfe/trunk: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/tmmintrin.h test/CodeGen/palignr.c
Eli Friedman
eli.friedman at gmail.com
Mon Dec 14 13:49:09 PST 2009
On Sun, Dec 13, 2009 at 8:57 PM, Nate Begeman <natebegeman at mac.com> wrote:
> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Dec 13 22:57:03 2009
> @@ -807,10 +807,38 @@
> }
> case X86::BI__builtin_ia32_palignr128:
> case X86::BI__builtin_ia32_palignr: {
> - Function *F = CGM.getIntrinsic(BuiltinID == X86::BI__builtin_ia32_palignr128 ?
> - Intrinsic::x86_ssse3_palign_r_128 :
> - Intrinsic::x86_ssse3_palign_r);
> - return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size());
> + unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
> +
> + // If palignr is shifting the pair of input vectors less than 17 bytes,
> + // emit a shuffle instruction.
> + if (shiftVal <= 16) {
> + const llvm::Type *IntTy = llvm::Type::getInt32Ty(VMContext);
> +
> + llvm::SmallVector<llvm::Constant*, 16> Indices;
> + for (unsigned i = 0; i != 16; ++i)
> + Indices.push_back(llvm::ConstantInt::get(IntTy, shiftVal + i));
> +
> + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
> + return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
> + }
> +
> + // If palignr is shifting the pair of input vectors more than 16 but less
> + // than 32 bytes, emit a logical right shift of the destination.
> + if (shiftVal < 32) {
> + const llvm::Type *EltTy = llvm::Type::getInt64Ty(VMContext);
> + const llvm::Type *VecTy = llvm::VectorType::get(EltTy, 2);
> + const llvm::Type *IntTy = llvm::Type::getInt32Ty(VMContext);
> +
> + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
> + Ops[1] = llvm::ConstantInt::get(IntTy, (shiftVal-16) * 8);
> +
> + // create i32 constant
> + llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
> + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
> + }
Two issues here:
1. The second operand of psrldq is in bytes, not bits.
2. It would be better to simply emit this as a shuffle; we're
generally trying to get rid of unnecessary intrinsics, not add more
uses of them.
-Eli
More information about the cfe-commits
mailing list