[PATCH] [X86] Replace (v)palignr intrinsics with generic shuffles (LLVM)

Thu Mar 12 10:50:29 PDT 2015

Hi craig.topper, andreadb, spatel, chandlerc,

The (v)palignr instructions are currently described using builtin intrinsics although the x86 shuffle lowering code now correctly identifies them.

This patch auto upgrades the palignr builtins to generic shuffles. I was also able to enable an old broken test (palign-2.ll).

Companion patch to D8301

REPOSITORY
  rL LLVM

http://reviews.llvm.org/D8302

Files:
  lib/IR/AutoUpgrade.cpp
  test/CodeGen/X86/palignr-2.ll

Index: lib/IR/AutoUpgrade.cpp
===================================================================

--- lib/IR/AutoUpgrade.cpp
+++ lib/IR/AutoUpgrade.cpp
@@ -179,6 +179,8 @@
         Name == "x86.sse2.psrl.dq.bs" ||
         Name == "x86.avx2.psll.dq.bs" ||
         Name == "x86.avx2.psrl.dq.bs" ||
+        Name == "x86.ssse3.palign.r.128" ||
+        Name == "x86.avx2.palign.r.256" ||
         Name == "x86.sse41.pblendw" ||
         Name == "x86.sse41.blendpd" ||
         Name == "x86.sse41.blendps" ||
@@ -611,6 +613,50 @@
       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
                                        Shift);
+    } else if (Name == "llvm.x86.ssse3.palign.r.128" ||
+               Name == "llvm.x86.avx2.palign.r.256") {
+      Value *Op0 = CI->getArgOperand(0);
+      Value *Op1 = CI->getArgOperand(1);
+      unsigned ShiftVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+
+      VectorType *VecTy = cast<VectorType>(CI->getType());
+      unsigned VecBitWidth = VecTy->getBitWidth();
+      unsigned NumLanes = VecBitWidth / 128;
+      unsigned NumElts = NumLanes * 16;
+      unsigned NumLaneElts = NumElts / NumLanes;
+      assert(0 == (VecBitWidth % 128) && "Illegal vector width");
+      VectorType *ShufTy = VectorType::get(Type::getInt8Ty(C), NumElts);
+
+      // If palignr is shifting the pair of vectors more than the size of two
+      // lanes, emit zero.
+      if (ShiftVal >= (2 * NumLaneElts)) {
+        Rep = llvm::Constant::getNullValue(CI->getType());
+      } else {
+        // If palignr is shifting the pair of input vectors more than one lane,
+        // but less than two lanes, convert to shifting in zeroes.
+        if (ShiftVal > NumLaneElts) {
+          ShiftVal -= NumLaneElts;
+          Op0 = llvm::Constant::getNullValue(VecTy);
+        }
+
+        Op0 = Builder.CreateBitCast(Op0, ShufTy);
+        Op1 = Builder.CreateBitCast(Op1, ShufTy);
+
+        SmallVector<llvm::Constant*, 32> Indices;
+        // 256-bit palignr operates on 128-bit lanes so we need to handle that
+        for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+          for (unsigned i = 0; i != NumLaneElts; ++i) {
+            unsigned Idx = ShiftVal + i;
+            if (Idx >= NumLaneElts)
+              Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+            Indices.push_back(llvm::ConstantInt::get(Type::getInt32Ty(C), Idx + l));
+          }
+        }
+
+        Value* SV = llvm::ConstantVector::get(Indices);
+        Rep = Builder.CreateShuffleVector(Op1, Op0, SV);
+        Rep = Builder.CreateBitCast(Rep, VecTy);
+      }
     } else if (Name == "llvm.x86.sse41.pblendw" ||
                Name == "llvm.x86.sse41.blendpd" ||
                Name == "llvm.x86.sse41.blendps" ||
Index: test/CodeGen/X86/palignr-2.ll
===================================================================
--- test/CodeGen/X86/palignr-2.ll
+++ test/CodeGen/X86/palignr-2.ll
@@ -8,8 +8,8 @@
 define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
 entry:
 ; CHECK-LABEL: t1:
-; palignr $3, %xmm1, %xmm0
-  %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
+; CHECK: palignr $3, %xmm1, %xmm0
+  %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 3) nounwind readnone
   store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
   ret void
 }
@@ -19,10 +19,10 @@
 define void @t2() nounwind ssp {
 entry:
 ; CHECK-LABEL: t2:
-; palignr $4, _b, %xmm0
+; CHECK: palignr $4, _b, %xmm0
   %0 = load <2 x i64>, <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
   %1 = load <2 x i64>, <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
-  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 4) nounwind readnone
   store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
   ret void
 }

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D8302.21852.patch
Type: text/x-patch
Size: 4286 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150312/16dfe473/attachment.bin>