[PATCH] Optimize insertqi when we copy all the lower 64 bits.

Filipe Cabecinhas filcab+llvm.phabricator at gmail.com
Fri Apr 11 09:08:54 PDT 2014


Hi grosbach,

Since the upper 64 bits of the destination register are undefined when
performing this operation, we can substitute it and let the optimizer
figure out that only a copy is needed.

http://reviews.llvm.org/D3357

Files:
  lib/Transforms/InstCombine/InstCombineCalls.cpp
  test/Transforms/InstCombine/2014-04-11-SSE4a-insertqi-64-bits.ll

Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -576,6 +576,24 @@
     break;
   }
 
+  case Intrinsic::x86_sse4a_insertqi: {
+    // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
+    // ones undef
+    if (ConstantInt *Width = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+      if (ConstantInt *Start = dyn_cast<ConstantInt>(II->getArgOperand(3)))
+        if (Width->equalsInt(64) && Start->isZero()) {
+          Value *Vec = II->getArgOperand(1);
+          Value *Undef = UndefValue::get(Vec->getType());
+          const uint32_t Mask[] = { 0, 2 };
+          return ReplaceInstUsesWith(
+              CI,
+              Builder->CreateShuffleVector(
+                  Vec, Undef, ConstantDataVector::get(
+                                  II->getContext(), ArrayRef<uint32_t>(Mask))));
+        }
+    break;
+  }
+
   case Intrinsic::ppc_altivec_vperm:
     // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
     if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
Index: test/Transforms/InstCombine/2014-04-11-SSE4a-insertqi-64-bits.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/2014-04-11-SSE4a-insertqi-64-bits.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: insertqi
+
+define <2 x i64> @test(<2 x i64> %v, <2 x i64> %i) #0 {
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+  ret <2 x i64> %1
+}
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
+
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3357.1.patch
Type: text/x-patch
Size: 1844 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140411/09eb89bf/attachment.bin>


More information about the llvm-commits mailing list