[cfe-commits] r91264 - in /cfe/trunk: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/tmmintrin.h test/CodeGen/palignr.c

Eli Friedman eli.friedman at gmail.com
Mon Dec 14 13:49:09 PST 2009


On Sun, Dec 13, 2009 at 8:57 PM, Nate Begeman <natebegeman at mac.com> wrote:
> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Dec 13 22:57:03 2009
> @@ -807,10 +807,38 @@
>   }
>   case X86::BI__builtin_ia32_palignr128:
>   case X86::BI__builtin_ia32_palignr: {
> -    Function *F = CGM.getIntrinsic(BuiltinID == X86::BI__builtin_ia32_palignr128 ?
> -                                  Intrinsic::x86_ssse3_palign_r_128 :
> -                                  Intrinsic::x86_ssse3_palign_r);
> -    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size());
> +    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
> +
> +    // If palignr is shifting the pair of input vectors less than 17 bytes,
> +    // emit a shuffle instruction.
> +    if (shiftVal <= 16) {
> +      const llvm::Type *IntTy = llvm::Type::getInt32Ty(VMContext);
> +
> +      llvm::SmallVector<llvm::Constant*, 16> Indices;
> +      for (unsigned i = 0; i != 16; ++i)
> +        Indices.push_back(llvm::ConstantInt::get(IntTy, shiftVal + i));
> +
> +      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
> +      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
> +    }
> +
> +    // If palignr is shifting the pair of input vectors more than 16 but less
> +    // than 32 bytes, emit a logical right shift of the destination.
> +    if (shiftVal < 32) {
> +      const llvm::Type *EltTy = llvm::Type::getInt64Ty(VMContext);
> +      const llvm::Type *VecTy = llvm::VectorType::get(EltTy, 2);
> +      const llvm::Type *IntTy = llvm::Type::getInt32Ty(VMContext);
> +
> +      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
> +      Ops[1] = llvm::ConstantInt::get(IntTy, (shiftVal-16) * 8);
> +
> +      // create i32 constant
> +      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
> +      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
> +    }

Two issues here:

1. The second operand of psrldq is in bytes, not bits.
2. It would be better to simply emit this as a shuffle; we're
generally trying to get rid of unnecessary intrinsics, not add more
uses of them.

-Eli




More information about the cfe-commits mailing list