[PATCH] [X86] Replace (v)palignr intrinsics with generic shuffles (LLVM)
Simon Pilgrim
llvm-dev at redking.me.uk
Thu Mar 12 10:50:29 PDT 2015
Hi craig.topper, andreadb, spatel, chandlerc,
The (v)palignr instructions are currently described using builtin intrinsics although the x86 shuffle lowering code now correctly identifies them.
This patch auto upgrades the palignr builtins to generic shuffles. I was also able to enable an old broken test (palign-2.ll).
Companion patch to D8301
REPOSITORY
rL LLVM
http://reviews.llvm.org/D8302
Files:
lib/IR/AutoUpgrade.cpp
test/CodeGen/X86/palignr-2.ll
Index: lib/IR/AutoUpgrade.cpp
===================================================================
--- lib/IR/AutoUpgrade.cpp
+++ lib/IR/AutoUpgrade.cpp
@@ -179,6 +179,8 @@
Name == "x86.sse2.psrl.dq.bs" ||
Name == "x86.avx2.psll.dq.bs" ||
Name == "x86.avx2.psrl.dq.bs" ||
+ Name == "x86.ssse3.palign.r.128" ||
+ Name == "x86.avx2.palign.r.256" ||
Name == "x86.sse41.pblendw" ||
Name == "x86.sse41.blendpd" ||
Name == "x86.sse41.blendps" ||
@@ -611,6 +613,50 @@
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift);
+ } else if (Name == "llvm.x86.ssse3.palign.r.128" ||
+ Name == "llvm.x86.avx2.palign.r.256") {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned ShiftVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned VecBitWidth = VecTy->getBitWidth();
+ unsigned NumLanes = VecBitWidth / 128;
+ unsigned NumElts = NumLanes * 16;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ assert(0 == (VecBitWidth % 128) && "Illegal vector width");
+ VectorType *ShufTy = VectorType::get(Type::getInt8Ty(C), NumElts);
+
+ // If palignr is shifting the pair of vectors more than the size of two
+ // lanes, emit zero.
+ if (ShiftVal >= (2 * NumLaneElts)) {
+ Rep = llvm::Constant::getNullValue(CI->getType());
+ } else {
+ // If palignr is shifting the pair of input vectors more than one lane,
+ // but less than two lanes, convert to shifting in zeroes.
+ if (ShiftVal > NumLaneElts) {
+ ShiftVal -= NumLaneElts;
+ Op0 = llvm::Constant::getNullValue(VecTy);
+ }
+
+ Op0 = Builder.CreateBitCast(Op0, ShufTy);
+ Op1 = Builder.CreateBitCast(Op1, ShufTy);
+
+ SmallVector<llvm::Constant*, 32> Indices;
+ // 256-bit palignr operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Idx = ShiftVal + i;
+ if (Idx >= NumLaneElts)
+ Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+ Indices.push_back(llvm::ConstantInt::get(Type::getInt32Ty(C), Idx + l));
+ }
+ }
+
+ Value* SV = llvm::ConstantVector::get(Indices);
+ Rep = Builder.CreateShuffleVector(Op1, Op0, SV);
+ Rep = Builder.CreateBitCast(Rep, VecTy);
+ }
} else if (Name == "llvm.x86.sse41.pblendw" ||
Name == "llvm.x86.sse41.blendpd" ||
Name == "llvm.x86.sse41.blendps" ||
Index: test/CodeGen/X86/palignr-2.ll
===================================================================
--- test/CodeGen/X86/palignr-2.ll
+++ test/CodeGen/X86/palignr-2.ll
@@ -8,8 +8,8 @@
define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
entry:
; CHECK-LABEL: t1:
-; palignr $3, %xmm1, %xmm0
- %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
+; CHECK: palignr $3, %xmm1, %xmm0
+ %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 3) nounwind readnone
store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
ret void
}
@@ -19,10 +19,10 @@
define void @t2() nounwind ssp {
entry:
; CHECK-LABEL: t2:
-; palignr $4, _b, %xmm0
+; CHECK: palignr $4, _b, %xmm0
%0 = load <2 x i64>, <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
%1 = load <2 x i64>, <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
- %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
+ %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 4) nounwind readnone
store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
ret void
}
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D8302.21852.patch
Type: text/x-patch
Size: 4286 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150312/16dfe473/attachment.bin>
More information about the llvm-commits
mailing list