<html><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; ">Doh! Who ratted me out? ;-)<div><br class="webkit-block-placeholder"></div><div>-bw</div><div><br><div><div>On Jan 4, 2008, at 3:43 PM, Evan Cheng wrote:</div><br class="Apple-interchange-newline"><blockquote type="cite"><div style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; ">Bill? You *own* MMX. :-)<div><br class="webkit-block-placeholder"></div><div>Evan</div><div><br><div><div>On Jan 4, 2008, at 2:56 PM, Dale Johannesen wrote:</div><br class="Apple-interchange-newline"><blockquote type="cite"><div style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="font-family: Times; "><pre style="font-size: 14px; ">This patch does not make the corresponding change in llvm-gcc-4.*/gcc/config/i386/llvm-i386.cpp, with the effect that all these builtins are broken.  Please fix?</pre><pre style="font-size: 14px; ">(This shows up easily in the gcc testsuite, I'm a bit disappointed it took 3 weeks for somebody to notice.)</pre><blockquote type="cite"><pre>Author: andersca</pre><pre>Date: Fri Dec 14 00:38:54 2007
New Revision: 45027

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=45027&view=rev">http://llvm.org/viewvc/llvm-project?rev=45027&view=rev</a>
Log:
All MMX shift instructions took a <2 x i32> vector as the shift amount parameter. Change this to be <1 x i64> instead, which matches the assembler instruction.

Modified:
    llvm/trunk/include/llvm/IntrinsicsX86.td
    llvm/trunk/lib/VMCore/AutoUpgrade.cpp
    llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll

Modified: llvm/trunk/include/llvm/IntrinsicsX86.td
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=45027&r1=45026&r2=45027&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=45027&r1=45026&r2=45027&view=diff</a>

==============================================================================
--- llvm/trunk/include/llvm/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IntrinsicsX86.td Fri Dec 14 00:38:54 2007
@@ -767,30 +767,30 @@
   // Shift left logical
   def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
   def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
               Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
   def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
               Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
   def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
               Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
   def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
               Intrinsic<[llvm_v1i64_ty,   llvm_v1i64_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
   def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
               Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v1i64_ty], [IntrNoMem]>;
 }
 
 // Pack ops.

Modified: llvm/trunk/lib/VMCore/AutoUpgrade.cpp
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AutoUpgrade.cpp?rev=45027&r1=45026&r2=45027&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AutoUpgrade.cpp?rev=45027&r1=45026&r2=45027&view=diff</a>

==============================================================================
--- llvm/trunk/lib/VMCore/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/VMCore/AutoUpgrade.cpp Fri Dec 14 00:38:54 2007
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/AutoUpgrade.h"
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
@@ -110,6 +111,39 @@
     }
 
     break;
+  case 'x': 
+    // This fixes all MMX shift intrinsic instructions to take a
+    // v1i64 instead of a v2i32 as the second parameter.
+    if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
+        (Name.compare(13,4,"psll", 4) == 0 ||
+         Name.compare(13,4,"psra", 4) == 0 ||
+         Name.compare(13,4,"psrl", 4) == 0)) {
+      
+      const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1);
+      
+      // We don't have to do anything if the parameter already has
+      // the correct type.
+      if (FTy->getParamType(1) == VT)
+        break;
+      
+      //  We first need to change the name of the old (bad) intrinsic, because 
+      //  its type is incorrect, but we cannot overload that name. We 
+      //  arbitrarily unique it here allowing us to construct a correctly named 
+      //  and typed function below.
+      F->setName("");
+
+      assert(FTy->getNumParams() == 2 && "MMX shift intrinsics take 2 args!");
+      
+      //  Now construct the new intrinsic with the correct name and type. We 
+      //  leave the old function around in order to query its type, whatever it 
+      //  may be, and correctly convert up to the new type.
+      return cast<Function>(M->getOrInsertFunction(Name, 
+                                                   FTy->getReturnType(),
+                                                   FTy->getParamType(0),
+                                                   VT,
+                                                   (Type *)0));
+    }
+    break;
   }
 
   //  This may not belong here. This function is effectively being overloaded 
@@ -141,6 +175,40 @@
   
   switch(NewFn->getIntrinsicID()) {
   default:  assert(0 && "Unknown function for CallInst upgrade.");
+  case Intrinsic::x86_mmx_psll_d:
+  case Intrinsic::x86_mmx_psll_q:
+  case Intrinsic::x86_mmx_psll_w:
+  case Intrinsic::x86_mmx_psra_d:
+  case Intrinsic::x86_mmx_psra_w:
+  case Intrinsic::x86_mmx_psrl_d:
+  case Intrinsic::x86_mmx_psrl_q:
+  case Intrinsic::x86_mmx_psrl_w: {
+    SmallVector<Value*, 2> Operands;
+    
+    Operands.push_back(CI->getOperand(1));
+    
+    // Cast the second parameter to the correct type.
+    BitCastInst *BC = new BitCastInst(CI->getOperand(2), 
+                                      NewFn->getFunctionType()->getParamType(1),
+                                      "upgraded", CI);
+    Operands.push_back(BC);
+    
+    //  Construct a new CallInst
+    CallInst *NewCI = new CallInst(NewFn, Operands.begin(), Operands.end(), 
+                                   "upgraded."+CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+    
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty())
+      //  Replace all uses of the old call with the new cast which has the 
+      //  correct type.
+      CI->replaceAllUsesWith(NewCI);
+    
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+    break;
+  }        
   case Intrinsic::ctlz:
   case Intrinsic::ctpop:
   case Intrinsic::cttz:

Modified: llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll?rev=45027&r1=45026&r2=45027&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll?rev=45027&r1=45026&r2=45027&view=diff</a>

==============================================================================
--- llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll (original)
+++ llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll Fri Dec 14 00:38:54 2007
@@ -6,6 +6,8 @@
 ; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
 ; RUN: llvm-as < %s | llvm-dis | \
 ; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {2 x i32> | count 6
 
 declare i32 @llvm.ctpop.i28(i28 %val)
 declare i32 @llvm.cttz.i29(i29 %val)
@@ -50,3 +52,30 @@
   ret i32 %d
 }
 
+declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone 
+define void @sh16(<4 x i16> %A, <2 x i32> %B) {
+       %r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B )               ; <<4 x i16>> [#uses=0]
+       %r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B )               ; <<4 x i16>> [#uses=0]
+       %r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B )               ; <<4 x i16>> [#uses=0]
+       ret void
+}
+
+declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone 
+define void @sh32(<2 x i32> %A, <2 x i32> %B) {
+       %r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B )               ; <<2 x i32>> [#uses=0]
+       %r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B )               ; <<2 x i32>> [#uses=0]
+       %r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B )               ; <<2 x i32>> [#uses=0]
+       ret void
+}
+
+declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone 
+define void @sh64(<1 x i64> %A, <2 x i32> %B) {
+       %r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B )               ; <<1 x i64>> [#uses=0]
+       %r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B )               ; <<1 x i64>> [#uses=0]
+       ret void
+}
<br></pre></blockquote></span></div>_______________________________________________<br>llvm-commits mailing list<br><a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br><a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br></blockquote></div><br></div></div></blockquote></div><br></div></body></html>