[llvm-commits] working on generalized vector intrinsics

Fri Jul 13 13:35:30 PDT 2007

This patch adds a new ValueType for overloading on floating-point types,
fAny, and reworks some of the overloading framework to allow iAny and fAny
to be overloaded with vector types. It generalizes sqrt and powi, and adds
a few new intrinsics, sin, cos, exp, log, and pow, which demonstrate the
new functionality.

It's a work in progress and incomplete in several aspects; I'm posting it
now to see what feedback I can get on the general direction. However, it is
complete enough to pass the LLVM regression test, including the minimal
vector-intrinsics.ll test that it adds.

In its current form it introduces two incompatibilities. That's no problem
for my own current projects, so I'm interested to hear from people where
this is a concern.

First, the bswap, part_select and part_set intrinsics in LLVM currently
require some types to be specified redundantly, because there's no framework
way to require that, for example, the return type is the same as an argument
type. My patch provides such a way, and I have updated these intrinsics to
use it. This incompatibility is not critical to the overall patch though, and
can be easily reverted if people prefer compatibility to prettiness (;-)).

The other incompatibility is with ctpop, ctlz, and cttz. Instead of always
returning i32, these now have a return type which matches their operand type.
This is what the SelectionDAG framework currently has; this patch makes
the LLVM intrinsics work the same way. The primary reason for the change is
to make these intrinsics consistent when they are overloaded with vector
types, as it is very awkward to make them always return <N x i32>, and
somewhat awkward if the scalar versions are inconsistent with the vector
versions. This change would requires a change to llvm-gcc and any other
front-end that uses these.

It's interesting to note that both of these changes reflect the original
versions of these intrinsics in LLVM, which were subsequently changed to
accomodate the overloading framework. For example, there is still text in
the LangRef.html for ctpop and friends that says
"The return type must match the argument type."

Dan

-- 
Dan Gohman, Cray Inc.
-------------- next part --------------
Index: test/Feature/llvm2cpp.ll
===================================================================

--- test/Feature/llvm2cpp.ll	(revision 39829)
+++ test/Feature/llvm2cpp.ll	(working copy)
@@ -403,30 +403,6 @@
 
 declare void @llvm.prefetch(i8*, i32, i32)
 
-declare i32 @upgrd.rm.llvm.ctpop.i8(i8)
-
-declare i32 @upgrd.rm.llvm.ctpop.i16(i16)
-
-declare i32 @upgrd.rm.llvm.ctpop.i32(i32)
-
-declare i32 @upgrd.rm.llvm.ctpop.i64(i64)
-
-declare i32 @upgrd.rm.llvm.cttz.i8(i8)
-
-declare i32 @upgrd.rm.llvm.cttz.i16(i16)
-
-declare i32 @upgrd.rm.llvm.cttz.i32(i32)
-
-declare i32 @upgrd.rm.llvm.cttz.i64(i64)
-
-declare i32 @upgrd.rm.llvm.ctlz.i8(i8)
-
-declare i32 @upgrd.rm.llvm.ctlz.i16(i16)
-
-declare i32 @upgrd.rm.llvm.ctlz.i32(i32)
-
-declare i32 @upgrd.rm.llvm.ctlz.i64(i64)
-
 declare float @llvm.sqrt.f32(float)
 
 declare double @llvm.sqrt.f64(double)
@@ -437,56 +413,44 @@
 	call void @llvm.prefetch( i8* null, i32 1, i32 3 )
 	call float @llvm.sqrt.f32( float 5.000000e+00 )		; <float>:3 [#uses=0]
 	call double @llvm.sqrt.f64( double 6.000000e+00 )		; <double>:4 [#uses=0]
-	call i32 @llvm.ctpop.i8( i8 10 )		; <i32>:5 [#uses=1]
-	bitcast i32 %5 to i32		; <i32>:6 [#uses=0]
-	call i32 @llvm.ctpop.i16( i16 11 )		; <i32>:7 [#uses=1]
-	bitcast i32 %7 to i32		; <i32>:8 [#uses=0]
+	call i8 @llvm.ctpop.i8( i8 10 )		; <i32>:5 [#uses=1]
+	call i16 @llvm.ctpop.i16( i16 11 )		; <i32>:7 [#uses=1]
 	call i32 @llvm.ctpop.i32( i32 12 )		; <i32>:9 [#uses=1]
-	bitcast i32 %9 to i32		; <i32>:10 [#uses=0]
-	call i32 @llvm.ctpop.i64( i64 13 )		; <i32>:11 [#uses=1]
-	bitcast i32 %11 to i32		; <i32>:12 [#uses=0]
-	call i32 @llvm.ctlz.i8( i8 14 )		; <i32>:13 [#uses=1]
-	bitcast i32 %13 to i32		; <i32>:14 [#uses=0]
-	call i32 @llvm.ctlz.i16( i16 15 )		; <i32>:15 [#uses=1]
-	bitcast i32 %15 to i32		; <i32>:16 [#uses=0]
+	call i64 @llvm.ctpop.i64( i64 13 )		; <i32>:11 [#uses=1]
+	call i8 @llvm.ctlz.i8( i8 14 )		; <i32>:13 [#uses=1]
+	call i16 @llvm.ctlz.i16( i16 15 )		; <i32>:15 [#uses=1]
 	call i32 @llvm.ctlz.i32( i32 16 )		; <i32>:17 [#uses=1]
-	bitcast i32 %17 to i32		; <i32>:18 [#uses=0]
-	call i32 @llvm.ctlz.i64( i64 17 )		; <i32>:19 [#uses=1]
-	bitcast i32 %19 to i32		; <i32>:20 [#uses=0]
-	call i32 @llvm.cttz.i8( i8 18 )		; <i32>:21 [#uses=1]
-	bitcast i32 %21 to i32		; <i32>:22 [#uses=0]
-	call i32 @llvm.cttz.i16( i16 19 )		; <i32>:23 [#uses=1]
-	bitcast i32 %23 to i32		; <i32>:24 [#uses=0]
+	call i64 @llvm.ctlz.i64( i64 17 )		; <i32>:19 [#uses=1]
+	call i8 @llvm.cttz.i8( i8 18 )		; <i32>:21 [#uses=1]
+	call i16 @llvm.cttz.i16( i16 19 )		; <i32>:23 [#uses=1]
 	call i32 @llvm.cttz.i32( i32 20 )		; <i32>:25 [#uses=1]
-	bitcast i32 %25 to i32		; <i32>:26 [#uses=0]
-	call i32 @llvm.cttz.i64( i64 21 )		; <i32>:27 [#uses=1]
-	bitcast i32 %27 to i32		; <i32>:28 [#uses=0]
+	call i64 @llvm.cttz.i64( i64 21 )		; <i32>:27 [#uses=1]
 	ret void
 }
 
-declare i32 @llvm.ctpop.i8(i8)
+declare i8 @llvm.ctpop.i8(i8)
 
-declare i32 @llvm.ctpop.i16(i16)
+declare i16 @llvm.ctpop.i16(i16)
 
 declare i32 @llvm.ctpop.i32(i32)
 
-declare i32 @llvm.ctpop.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
 
-declare i32 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8)
 
-declare i32 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16)
 
 declare i32 @llvm.ctlz.i32(i32)
 
-declare i32 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64)
 
-declare i32 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8)
 
-declare i32 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16)
 
 declare i32 @llvm.cttz.i32(i32)
 
-declare i32 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64)
 
 ; ModuleID = 'packed.ll'
 @foo1 = external global <4 x float>		; <<4 x float>*> [#uses=2]
Index: test/Transforms/InstCombine/bitcount.ll
===================================================================
--- test/Transforms/InstCombine/bitcount.ll	(revision 39829)
+++ test/Transforms/InstCombine/bitcount.ll	(working copy)
@@ -3,15 +3,17 @@
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | \
 ; RUN:   grep -v declare | not grep llvm.ct
 
-declare i32 @llvm.ctpop.i31(i31 %val) 
+declare i31 @llvm.ctpop.i31(i31 %val) 
 declare i32 @llvm.cttz.i32(i32 %val) 
-declare i32 @llvm.ctlz.i33(i33 %val) 
+declare i33 @llvm.ctlz.i33(i33 %val) 
 
 define i32 @test(i32 %A) {
-  %c1 = call i32 @llvm.ctpop.i31(i31 12415124)
+  %c1 = call i31 @llvm.ctpop.i31(i31 12415124)
   %c2 = call i32 @llvm.cttz.i32(i32 87359874)
-  %c3 = call i32 @llvm.ctlz.i33(i33 87359874)
-  %r1 = add i32 %c1, %c2
-  %r2 = add i32 %r1, %c3
+  %c3 = call i33 @llvm.ctlz.i33(i33 87359874)
+  %t1 = zext i31 %c1 to i32
+  %t3 = trunc i33 %c3 to i32
+  %r1 = add i32 %t1, %c2
+  %r2 = add i32 %r1, %t3
   ret i32 %r2
 }
Index: test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
===================================================================
--- test/CodeGen/PowerPC/2007-03-24-cntlzd.ll	(revision 39829)
+++ test/CodeGen/PowerPC/2007-03-24-cntlzd.ll	(working copy)
@@ -2,10 +2,11 @@
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
         %tmp19 = load i64* %t
-        %tmp23 = tail call i32 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp23 = trunc i64 %tmp22 to i32
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
         %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
         ret i32 %tmp90
 }
 
-declare i32 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64)
Index: test/CodeGen/Alpha/ctlz.ll
===================================================================
--- test/CodeGen/Alpha/ctlz.ll	(revision 39829)
+++ test/CodeGen/Alpha/ctlz.ll	(working copy)
@@ -5,10 +5,11 @@
 ; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev56 | not grep -i ctlz
 ; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | not grep -i ctlz
 
-declare i32 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8)
 
 define i32 @bar(i8 %x) {
 entry:
-	%tmp.1 = call i32 @llvm.ctlz.i8( i8 %x ) 
-	ret i32 %tmp.1
+	%tmp.1 = call i8 @llvm.ctlz.i8( i8 %x ) 
+	%tmp.2 = sext i8 %tmp.1 to i32
+	ret i32 %tmp.2
 }
Index: test/CodeGen/X86/vector-intrinsics.ll
===================================================================
--- test/CodeGen/X86/vector-intrinsics.ll	(revision 0)
+++ test/CodeGen/X86/vector-intrinsics.ll	(revision 0)
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {sqrtpd %xmm0, %xmm0}
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {sqrtpd %xmm1, %xmm1}
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {call sin} | wc -l | grep 2
+
+declare <2 x double> @llvm.sin.v2f64(<2 x double> %x);
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double> %x);
+
+define <2 x double> @foo(<2 x double> %x)
+{
+  %y = call <2 x double> @llvm.sin.v2f64(<2 x double> %x)
+  ret <2 x double> %y
+}
+define <4 x double> @goo(<4 x double> %x)
+{
+  %y = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %x)
+  ret <4 x double> %y
+}
Index: test/CodeGen/Generic/bit-intrinsics.ll
===================================================================
--- test/CodeGen/Generic/bit-intrinsics.ll	(revision 39829)
+++ test/CodeGen/Generic/bit-intrinsics.ll	(working copy)
@@ -3,21 +3,21 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: lli --force-interpreter=true %t.bc
 
-declare i32 @llvm.part.set.i32.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo)
-declare i16 @llvm.part.set.i16.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo)
+declare i32 @llvm.part.set.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo)
+declare i16 @llvm.part.set.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo)
 define i32 @test_part_set(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.set.i32.i32.i32(i32 %A, i32 27, i32 8, i32 0)
-  %b = call i16 @llvm.part.set.i16.i16.i16(i16 %B, i16 27, i32 8, i32 0)
+  %a = call i32 @llvm.part.set.i32.i32(i32 %A, i32 27, i32 8, i32 0)
+  %b = call i16 @llvm.part.set.i16.i16(i16 %B, i16 27, i32 8, i32 0)
   %c = zext i16 %b to i32
   %d = add i32 %a, %c
   ret i32 %d
 }
 
-declare i32 @llvm.part.select.i32.i32(i32 %x, i32 %hi, i32 %lo)
-declare i16 @llvm.part.select.i16.i16(i16 %x, i32 %hi, i32 %lo)
+declare i32 @llvm.part.select.i32(i32 %x, i32 %hi, i32 %lo)
+declare i16 @llvm.part.select.i16(i16 %x, i32 %hi, i32 %lo)
 define i32 @test_part_select(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.select.i32.i32(i32 %A, i32 8, i32 0)
-  %b = call i16 @llvm.part.select.i16.i16(i16 %B, i32 8, i32 0)
+  %a = call i32 @llvm.part.select.i32(i32 %A, i32 8, i32 0)
+  %b = call i16 @llvm.part.select.i16(i16 %B, i32 8, i32 0)
   %c = zext i16 %b to i32
   %d = add i32 %a, %c
   ret i32 %d
Index: include/llvm/Intrinsics.td
===================================================================
--- include/llvm/Intrinsics.td	(revision 39829)
+++ include/llvm/Intrinsics.td	(working copy)
@@ -52,59 +52,49 @@
 // Types used by intrinsics.
 //===----------------------------------------------------------------------===//
 
-class LLVMType<ValueType vt, string typeval> {
+class LLVMType<ValueType vt> {
   ValueType VT = vt;
-  string TypeVal = typeval;
 }
 
-class LLVMIntegerType<ValueType VT, int width>
-  : LLVMType<VT, "Type::IntegerTyID"> {
-  int Width = width;
-}
-
-class LLVMVectorType<ValueType VT, int numelts, LLVMType elty>
-  : LLVMType<VT, "Type::VectorTyID">{
-  int NumElts = numelts;
-  LLVMType ElTy = elty;
-} 
-
 class LLVMPointerType<LLVMType elty>
-  : LLVMType<iPTR, "Type::PointerTyID">{
+  : LLVMType<iPTR>{
   LLVMType ElTy = elty;
 } 
 
-class LLVMEmptyStructType
-  : LLVMType<OtherVT, "Type::StructTyID">{
+class LLVMMatchType<int num>
+  : LLVMType<OtherVT>{
+  int Number = num;
 } 
 
-def llvm_void_ty       : LLVMType<isVoid, "Type::VoidTyID">;
-def llvm_int_ty        : LLVMIntegerType<iAny, 0>;
-def llvm_i1_ty         : LLVMIntegerType<i1 , 1>;
-def llvm_i8_ty         : LLVMIntegerType<i8 , 8>;
-def llvm_i16_ty        : LLVMIntegerType<i16, 16>;
-def llvm_i32_ty        : LLVMIntegerType<i32, 32>;
-def llvm_i64_ty        : LLVMIntegerType<i64, 64>;
-def llvm_float_ty      : LLVMType<f32, "Type::FloatTyID">;
-def llvm_double_ty     : LLVMType<f64, "Type::DoubleTyID">;
+def llvm_void_ty       : LLVMType<isVoid>;
+def llvm_anyint_ty     : LLVMType<iAny>;
+def llvm_anyfloat_ty   : LLVMType<fAny>;
+def llvm_i1_ty         : LLVMType<i1>;
+def llvm_i8_ty         : LLVMType<i8>;
+def llvm_i16_ty        : LLVMType<i16>;
+def llvm_i32_ty        : LLVMType<i32>;
+def llvm_i64_ty        : LLVMType<i64>;
+def llvm_f32_ty        : LLVMType<f32>;
+def llvm_f64_ty        : LLVMType<f64>;
 def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
 def llvm_ptrptr_ty     : LLVMPointerType<llvm_ptr_ty>;            // i8**
-def llvm_empty_ty      : LLVMEmptyStructType;                     // { }
+def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
 def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
 
-def llvm_v16i8_ty      : LLVMVectorType<v16i8,16, llvm_i8_ty>;    // 16 x i8
-def llvm_v8i16_ty      : LLVMVectorType<v8i16, 8, llvm_i16_ty>;   //  8 x i16
-def llvm_v2i64_ty      : LLVMVectorType<v2i64, 2, llvm_i64_ty>;   //  2 x i64
-def llvm_v2i32_ty      : LLVMVectorType<v2i32, 2, llvm_i32_ty>;   //  2 x i32
-def llvm_v1i64_ty      : LLVMVectorType<v1i64, 1, llvm_i64_ty>;   //  1 x i64
-def llvm_v4i32_ty      : LLVMVectorType<v4i32, 4, llvm_i32_ty>;   //  4 x i32
-def llvm_v4f32_ty      : LLVMVectorType<v4f32, 4, llvm_float_ty>; //  4 x float
-def llvm_v2f64_ty      : LLVMVectorType<v2f64, 2, llvm_double_ty>;//  2 x double
+def llvm_v16i8_ty      : LLVMType<v16i8>;    // 16 x i8
+def llvm_v8i16_ty      : LLVMType<v8i16>;    //  8 x i16
+def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
+def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
+def llvm_v1i64_ty      : LLVMType<v1i64>;    //  1 x i64
+def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
+def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
+def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
 
 // MMX Vector Types
-def llvm_v8i8_ty       : LLVMVectorType<v8i8,  8, llvm_i8_ty>;    //  8 x i8
-def llvm_v4i16_ty      : LLVMVectorType<v4i16, 4, llvm_i16_ty>;   //  4 x i16
+def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
+def llvm_v4i16_ty      : LLVMType<v4i16>;    //  4 x i16
 
-def llvm_vararg_ty     : LLVMType<isVoid, "...">; // vararg
+def llvm_vararg_ty     : LLVMType<isVoid>;   // this means vararg here
 
 //===----------------------------------------------------------------------===//
 // Intrinsic Definitions.
@@ -185,11 +175,14 @@
 }
 
 let Properties = [IntrNoMem] in {
-  def int_sqrt_f32 : Intrinsic<[llvm_float_ty , llvm_float_ty]>;
-  def int_sqrt_f64 : Intrinsic<[llvm_double_ty, llvm_double_ty]>;
-
-  def int_powi_f32 : Intrinsic<[llvm_float_ty , llvm_float_ty, llvm_i32_ty]>;
-  def int_powi_f64 : Intrinsic<[llvm_double_ty, llvm_double_ty, llvm_i32_ty]>;
+  def int_sqrt     : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>]>;
+  def int_powi     : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty]>;
+  def int_sin      : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>]>;
+  def int_cos      : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>]>;
+  def int_exp      : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>]>;
+  def int_log      : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>]>;
+  def int_pow      : Intrinsic<[llvm_anyfloat_ty,
+                                LLVMMatchType<0>, LLVMMatchType<0>]>;
 }
 
 // NOTE: these are internal interfaces.
@@ -203,14 +196,14 @@
 
 // None of these intrinsics accesses memory at all.
 let Properties = [IntrNoMem] in {
-  def int_bswap: Intrinsic<[llvm_int_ty, llvm_int_ty]>;
-  def int_ctpop: Intrinsic<[llvm_i32_ty, llvm_int_ty]>;
-  def int_ctlz : Intrinsic<[llvm_i32_ty, llvm_int_ty]>;
-  def int_cttz : Intrinsic<[llvm_i32_ty, llvm_int_ty]>;
+  def int_bswap: Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>]>;
+  def int_ctpop: Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>]>;
+  def int_ctlz : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>]>;
   def int_part_select : 
-     Intrinsic<[llvm_int_ty, llvm_int_ty, llvm_i32_ty, llvm_i32_ty]>;
+     Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty]>;
   def int_part_set :
-     Intrinsic<[llvm_int_ty, llvm_int_ty, llvm_int_ty, llvm_i32_ty, 
+     Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>, llvm_anyint_ty, llvm_i32_ty, 
                 llvm_i32_ty]>;
 }
 
Index: include/llvm/CodeGen/RuntimeLibcalls.h
===================================================================
--- include/llvm/CodeGen/RuntimeLibcalls.h	(revision 39829)
+++ include/llvm/CodeGen/RuntimeLibcalls.h	(working copy)
@@ -62,6 +62,12 @@
     SIN_F64,
     COS_F32,
     COS_F64,
+    LOG_F32,
+    LOG_F64,
+    EXP_F32,
+    EXP_F64,
+    POW_F32,
+    POW_F64,
 
     // CONVERSION
     FPEXT_F32_F64,
Index: include/llvm/CodeGen/SelectionDAGNodes.h
===================================================================
--- include/llvm/CodeGen/SelectionDAGNodes.h	(revision 39829)
+++ include/llvm/CodeGen/SelectionDAGNodes.h	(working copy)
@@ -369,9 +369,9 @@
     BIT_CONVERT,
     
     // FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI - Perform unary floating point
-    // negation, absolute value, square root, sine and cosine, and powi
-    // operations.
-    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI,
+    // negation, absolute value, square root, sine and cosine, powi,
+    // exp, log, and pow operations.
+    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FEXP, FLOG, FPOW,
     
     // LOAD and STORE have token chains as their first operand, then the same
     // operands as an LLVM load/store instruction, then an offset node that
Index: include/llvm/CodeGen/ValueTypes.h
===================================================================
--- include/llvm/CodeGen/ValueTypes.h	(revision 39829)
+++ include/llvm/CodeGen/ValueTypes.h	(working copy)
@@ -63,14 +63,19 @@
 
     LAST_VALUETYPE =  24,   // This always remains at the end of the list.
 
-    // iAny - An integer value of any bit width. This is used for intrinsics
-    // that have overloadings based on integer bit widths. This is only for
-    // tblgen's consumption!
-    iAny           = 254,   
+    // fAny - Any floating-point or vector floating-point value. This is used
+    // for intrinsics that have overloadings based on floating-point types.
+    // This is only for tblgen's consumption!
+    fAny           =  253,   
 
+    // iAny - An integer or vector integer value of any bit width. This is
+    // used for intrinsics that have overloadings based on integer bit widths.
+    // This is only for tblgen's consumption!
+    iAny           =  254,   
+
     // iPTR - An int value the size of the pointer of the current
     // target.  This should only be used internal to tblgen!
-    iPTR           = 255
+    iPTR           =  255
   };
 
   /// MVT::ValueType - This type holds low-level value types. Valid values
Index: include/llvm/CodeGen/ValueTypes.td
===================================================================
--- include/llvm/CodeGen/ValueTypes.td	(revision 39829)
+++ include/llvm/CodeGen/ValueTypes.td	(working copy)
@@ -45,8 +45,11 @@
 def v4f32  : ValueType<128, 22>;   //  4 x f32 vector value
 def v2f64  : ValueType<128, 23>;   //  2 x f64 vector value
 
+// Pseudo valuetype to represent "float of any format"
+def fAny   : ValueType<0  , 253>;
+
 // Pseudo valuetype to represent "integer of any bit width"
-def iAny   : ValueType<0  , 254>;   // integer value of any bit width
+def iAny   : ValueType<0  , 254>;
 
 // Pseudo valuetype mapped to the current pointer size.
 def iPTR   : ValueType<0  , 255>;
Index: docs/LangRef.html
===================================================================
--- docs/LangRef.html	(revision 39829)
+++ docs/LangRef.html	(working copy)
@@ -3704,7 +3704,7 @@
 names of its argument types encoded into its function name, each
 preceded by a period. For example, the <tt>llvm.ctpop</tt> function can take an
 integer of any width. This leads to a family of functions such as 
-<tt>i32 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i32 @llvm.ctpop.i29(i29 %val)</tt>.
+<tt>i8 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i29 @llvm.ctpop.i29(i29 %val)</tt>.
 </p>
 
 
@@ -4540,12 +4540,11 @@
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic function. You can use bswap on any integer
-type that is an even number of bytes (i.e. BitWidth % 16 == 0). Note the suffix
-that includes the type for the result and the operand.
+type that is an even number of bytes (i.e. BitWidth % 16 == 0).
 <pre>
-  declare i16 @llvm.bswap.i16.i16(i16 <id>)
-  declare i32 @llvm.bswap.i32.i32(i32 <id>)
-  declare i64 @llvm.bswap.i64.i64(i64 <id>)
+  declare i16 @llvm.bswap.i16(i16 <id>)
+  declare i32 @llvm.bswap.i32(i32 <id>)
+  declare i64 @llvm.bswap.i64(i64 <id>)
 </pre>
 
 <h5>Overview:</h5>
@@ -4560,12 +4559,12 @@
 <h5>Semantics:</h5>
 
 <p>
-The <tt>llvm.bswap.16.i16</tt> intrinsic returns an i16 value that has the high 
+The <tt>llvm.bswap.i16</tt> intrinsic returns an i16 value that has the high 
 and low byte of the input i16 swapped.  Similarly, the <tt>llvm.bswap.i32</tt> 
 intrinsic returns an i32 value that has the four bytes of the input i32 
 swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the returned 
-i32 will have its bytes in 3, 2, 1, 0 order.  The <tt>llvm.bswap.i48.i48</tt>, 
-<tt>llvm.bswap.i64.i64</tt> and other intrinsics extend this concept to
+i32 will have its bytes in 3, 2, 1, 0 order.  The <tt>llvm.bswap.i48</tt>, 
+<tt>llvm.bswap.i64</tt> and other intrinsics extend this concept to
 additional even-byte lengths (6 bytes, 8 bytes and more, respectively).
 </p>
 
@@ -4582,11 +4581,11 @@
 <p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
 width. Not all targets support all bit widths however.
 <pre>
-  declare i32 @llvm.ctpop.i8 (i8  <src>)
-  declare i32 @llvm.ctpop.i16(i16 <src>)
+  declare i8 @llvm.ctpop.i8 (i8  <src>)
+  declare i16 @llvm.ctpop.i16(i16 <src>)
   declare i32 @llvm.ctpop.i32(i32 <src>)
-  declare i32 @llvm.ctpop.i64(i64 <src>)
-  declare i32 @llvm.ctpop.i256(i256 <src>)
+  declare i64 @llvm.ctpop.i64(i64 <src>)
+  declare i256 @llvm.ctpop.i256(i256 <src>)
 </pre>
 
 <h5>Overview:</h5>
@@ -4621,11 +4620,11 @@
 <p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any 
 integer bit width. Not all targets support all bit widths however.
 <pre>
-  declare i32 @llvm.ctlz.i8 (i8  <src>)
-  declare i32 @llvm.ctlz.i16(i16 <src>)
+  declare i8 @llvm.ctlz.i8 (i8  <src>)
+  declare i16 @llvm.ctlz.i16(i16 <src>)
   declare i32 @llvm.ctlz.i32(i32 <src>)
-  declare i32 @llvm.ctlz.i64(i64 <src>)
-  declare i32 @llvm.ctlz.i256(i256 <src>)
+  declare i64 @llvm.ctlz.i64(i64 <src>)
+  declare i256 @llvm.ctlz.i256(i256 <src>)
 </pre>
 
 <h5>Overview:</h5>
@@ -4664,11 +4663,11 @@
 <p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any 
 integer bit width. Not all targets support all bit widths however.
 <pre>
-  declare i32 @llvm.cttz.i8 (i8  <src>)
-  declare i32 @llvm.cttz.i16(i16 <src>)
+  declare i8 @llvm.cttz.i8 (i8  <src>)
+  declare i16 @llvm.cttz.i16(i16 <src>)
   declare i32 @llvm.cttz.i32(i32 <src>)
-  declare i32 @llvm.cttz.i64(i64 <src>)
-  declare i32 @llvm.cttz.i256(i256 <src>)
+  declare i64 @llvm.cttz.i64(i64 <src>)
+  declare i256 @llvm.cttz.i256(i256 <src>)
 </pre>
 
 <h5>Overview:</h5>
@@ -4705,8 +4704,8 @@
 <p>This is an overloaded intrinsic. You can use <tt>llvm.part.select</tt> 
 on any integer bit width.
 <pre>
-  declare i17 @llvm.part.select.i17.i17 (i17 %val, i32 %loBit, i32 %hiBit)
-  declare i29 @llvm.part.select.i29.i29 (i29 %val, i32 %loBit, i32 %hiBit)
+  declare i17 @llvm.part.select.i17 (i17 %val, i32 %loBit, i32 %hiBit)
+  declare i29 @llvm.part.select.i29 (i29 %val, i32 %loBit, i32 %hiBit)
 </pre>
 
 <h5>Overview:</h5>
@@ -4752,8 +4751,8 @@
 <p>This is an overloaded intrinsic. You can use <tt>llvm.part.set</tt> 
 on any integer bit width.
 <pre>
-  declare i17 @llvm.part.set.i17.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
-  declare i29 @llvm.part.set.i29.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
+  declare i17 @llvm.part.set.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
+  declare i29 @llvm.part.set.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
 </pre>
 
 <h5>Overview:</h5>
Index: utils/TableGen/CodeGenTarget.cpp
===================================================================
--- utils/TableGen/CodeGenTarget.cpp	(revision 39829)
+++ utils/TableGen/CodeGenTarget.cpp	(working copy)
@@ -44,6 +44,7 @@
   case MVT::i64:   return "MVT::i64";
   case MVT::i128:  return "MVT::i128";
   case MVT::iAny:  return "MVT::iAny";
+  case MVT::fAny:  return "MVT::fAny";
   case MVT::f32:   return "MVT::f32";
   case MVT::f64:   return "MVT::f64";
   case MVT::f80:   return "MVT::f80";
@@ -76,6 +77,7 @@
   case MVT::i64:   return "MVT::i64";
   case MVT::i128:  return "MVT::i128";
   case MVT::iAny:  return "MVT::iAny";
+  case MVT::fAny:  return "MVT::fAny";
   case MVT::f32:   return "MVT::f32";
   case MVT::f64:   return "MVT::f64";
   case MVT::f80:   return "MVT::f80";
@@ -93,7 +95,7 @@
   case MVT::v2f32: return "MVT::v2f32";
   case MVT::v4f32: return "MVT::v4f32";
   case MVT::v2f64: return "MVT::v2f64";
-  case MVT::iPTR:  return "TLI.getPointerTy()";
+  case MVT::iPTR:  return "MVT::iPTR";
   default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
   }
 }
@@ -622,13 +624,12 @@
   for (unsigned i = 0, e = TypeList->getSize(); i != e; ++i) {
     Record *TyEl = TypeList->getElementAsRecord(i);
     assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
-    ArgTypes.push_back(TyEl->getValueAsString("TypeVal"));
     MVT::ValueType VT = getValueType(TyEl->getValueAsDef("VT"));
-    isOverloaded |= VT == MVT::iAny;
+    isOverloaded |= VT == MVT::iAny || VT == MVT::fAny;
     ArgVTs.push_back(VT);
     ArgTypeDefs.push_back(TyEl);
   }
-  if (ArgTypes.size() == 0)
+  if (ArgVTs.size() == 0)
     throw "Intrinsic '"+DefName+"' needs at least a type for the ret value!";
 
   
Index: utils/TableGen/CodeGenIntrinsics.h
===================================================================
--- utils/TableGen/CodeGenIntrinsics.h	(revision 39829)
+++ utils/TableGen/CodeGenIntrinsics.h	(working copy)
@@ -30,10 +30,6 @@
     std::string GCCBuiltinName;// Name of the corresponding GCC builtin, or "".
     std::string TargetPrefix;  // Target prefix, e.g. "ppc" for t-s intrinsics.
     
-    /// ArgTypes - The type primitive enum value for the return value and all
-    /// of the arguments.  These are things like Type::IntegerTyID.
-    std::vector<std::string> ArgTypes;
-    
     /// ArgVTs - The MVT::ValueType for each argument type.  Note that this list
     /// is only populated when in the context of a target .td file.  When
     /// building Intrinsics.td, this isn't available, because we don't know the
Index: utils/TableGen/RegisterInfoEmitter.cpp
===================================================================
--- utils/TableGen/RegisterInfoEmitter.cpp	(revision 39829)
+++ utils/TableGen/RegisterInfoEmitter.cpp	(working copy)
@@ -209,7 +209,7 @@
        << "  static const MVT::ValueType " << Name
        << "[] = {\n    ";
     for (unsigned i = 0, e = RC.VTs.size(); i != e; ++i)
-      OS << getName(RC.VTs[i]) << ", ";
+      OS << getEnumName(RC.VTs[i]) << ", ";
     OS << "MVT::Other\n  };\n\n";
   }
   OS << "}  // end anonymous namespace\n\n";
Index: utils/TableGen/IntrinsicEmitter.cpp
===================================================================
--- utils/TableGen/IntrinsicEmitter.cpp	(revision 39829)
+++ utils/TableGen/IntrinsicEmitter.cpp	(working copy)
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenTarget.h"
 #include "IntrinsicEmitter.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
@@ -94,12 +95,14 @@
     
     // For overloaded intrinsics, only the prefix needs to match
     if (Ints[I->second].isOverloaded)
-      OS << "    if (Len >= " << I->first.size()
-       << " && !memcmp(Name, \"" << I->first << "\", " << I->first.size()
-       << ")) return Intrinsic::" << Ints[I->second].EnumName << ";\n";
+      OS << "    if (Len > " << I->first.size()
+       << " && !memcmp(Name, \"" << I->first << ".\", "
+       << (I->first.size() + 1) << ")) return Intrinsic::"
+       << Ints[I->second].EnumName << ";\n";
     else 
       OS << "    if (Len == " << I->first.size()
-         << " && !memcmp(Name, \"" << I->first << "\", Len)) return Intrinsic::"
+         << " && !memcmp(Name, \"" << I->first << "\", "
+         << I->first.size() << ")) return Intrinsic::"
          << Ints[I->second].EnumName << ";\n";
   }
   OS << "  }\n";
@@ -117,50 +120,47 @@
   OS << "#endif\n\n";
 }
 
-static bool EmitTypeVerify(std::ostream &OS, Record *ArgType) {
-  if (ArgType->getValueAsString("TypeVal") == "...")  return true;
-  
-  OS << "(int)" << ArgType->getValueAsString("TypeVal") << ", ";
-  // If this is an integer type, check the width is correct.
-  if (ArgType->isSubClassOf("LLVMIntegerType"))
-    OS << ArgType->getValueAsInt("Width") << ", ";
-
-  // If this is a vector type, check that the subtype and size are correct.
-  else if (ArgType->isSubClassOf("LLVMVectorType")) {
-    EmitTypeVerify(OS, ArgType->getValueAsDef("ElTy"));
-    OS << ArgType->getValueAsInt("NumElts") << ", ";
-  }
-  
-  return false;
-}
-
 static void EmitTypeGenerate(std::ostream &OS, Record *ArgType, 
                              unsigned &ArgNo) {
-  if (ArgType->isSubClassOf("LLVMIntegerType")) {
-    unsigned BitWidth = ArgType->getValueAsInt("Width");
+  MVT::ValueType VT = getValueType(ArgType->getValueAsDef("VT"));
+
+  if (ArgType->isSubClassOf("LLVMMatchType")) {
+    unsigned Number = ArgType->getValueAsInt("Number");
+    assert(Number < ArgNo && "Invalid matching number!");
+    OS << "Tys[" << Number << "]";
+  } else if (VT == MVT::iAny || VT == MVT::fAny) {
     // NOTE: The ArgNo variable here is not the absolute argument number, it is
     // the index of the "arbitrary" type in the Tys array passed to the
     // Intrinsic::getDeclaration function. Consequently, we only want to
-    // increment it when we actually hit an arbitrary integer type which is
-    // identified by BitWidth == 0. Getting this wrong leads to very subtle
-    // bugs!
-    if (BitWidth == 0)
-      OS << "Tys[" << ArgNo++ << "]";
-    else
-      OS << "IntegerType::get(" << BitWidth << ")";
-  } else if (ArgType->isSubClassOf("LLVMVectorType")) {
-    OS << "VectorType::get(";
-    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
-    OS << ", " << ArgType->getValueAsInt("NumElts") << ")";
-  } else if (ArgType->isSubClassOf("LLVMPointerType")) {
+    // increment it when we actually hit an overloaded type. Getting this wrong
+    // leads to very subtle bugs!
+    OS << "Tys[" << ArgNo++ << "]";
+  } else if (MVT::isInteger(VT)) {
+    unsigned BitWidth = MVT::getSizeInBits(VT);
+    OS << "IntegerType::get(" << BitWidth << ")";
+  } else if (MVT::isVector(VT)) {
+    OS << "VectorType::get(MVT::getTypeForValueType("
+       << getEnumName(MVT::getVectorElementType(VT))
+       << "), " << MVT::getVectorNumElements(VT) << ")";
+  } else if (VT == MVT::iPTR) {
     OS << "PointerType::get(";
     EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
     OS << ")";
-  } else if (ArgType->isSubClassOf("LLVMEmptyStructType")) {
+  } else if (VT == MVT::Other) {
+    // MVT::OtherVT is used to mean the empty struct type here.
     OS << "StructType::get(std::vector<const Type *>())";
+  } else if (VT == MVT::f32) {
+    OS << "Type::FloatTy";
+  } else if (VT == MVT::f64) {
+    OS << "Type::DoubleTy";
+  } else if (VT == MVT::isVoid) {
+    if (ArgNo == 0)
+      OS << "Type::VoidTy";
+    else
+      // MVT::isVoid is used to mean varargs here.
+      OS << "...";
   } else {
-    OS << "Type::getPrimitiveType(";
-    OS << ArgType->getValueAsString("TypeVal") << ")";
+    assert(false && "Unsupported ValueType!");
   }
 }
 
@@ -209,18 +209,24 @@
     }
     
     const std::vector<Record*> &ArgTypes = I->first;
-    OS << "    VerifyIntrinsicPrototype(ID, IF, ";
-    bool VarArg = false;
+    OS << "    VerifyIntrinsicPrototype(ID, IF, " << ArgTypes.size() << ", ";
     for (unsigned j = 0; j != ArgTypes.size(); ++j) {
-      VarArg = EmitTypeVerify(OS, ArgTypes[j]);
-      if (VarArg) {
-        if ((j+1) != ArgTypes.size())
+      Record *ArgType = ArgTypes[j];
+      if (ArgType->isSubClassOf("LLVMMatchType")) {
+        unsigned Number = ArgType->getValueAsInt("Number");
+        assert(Number < j && "Invalid matching number!");
+        OS << "~" << Number;
+      } else {
+        MVT::ValueType VT = getValueType(ArgType->getValueAsDef("VT"));
+        OS << getEnumName(VT);
+        if (VT == MVT::isVoid && j != 0 && j != ArgTypes.size()-1)
           throw "Var arg type not last argument";
-        break;
       }
+      if (j != ArgTypes.size()-1)
+        OS << ", ";
     }
       
-    OS << (VarArg ? "-2);\n" : "-1);\n");
+    OS << ");\n";
     OS << "    break;\n";
   }
   OS << "  }\n";
@@ -255,7 +261,8 @@
     const std::vector<Record*> &ArgTypes = I->first;
     unsigned N = ArgTypes.size();
 
-    if (ArgTypes[N-1]->getValueAsString("TypeVal") == "...") {
+    if (N > 1 &&
+        getValueType(ArgTypes[N-1]->getValueAsDef("VT")) == MVT::isVoid) {
       OS << "    IsVarArg = true;\n";
       --N;
     }
Index: tools/llvm-upgrade/UpgradeParser.y.cvs
===================================================================
--- tools/llvm-upgrade/UpgradeParser.y.cvs	(revision 39829)
+++ tools/llvm-upgrade/UpgradeParser.y.cvs	(working copy)
@@ -1472,6 +1472,7 @@
         return new FCmpInst(FCmpInst::FCMP_UNO, Args[0], Args[1]);
       }
       break;
+#if 0
     case 'b':
       if (Name.length() == 14 && !memcmp(&Name[5], "bswap.i", 7)) {
         const Type* ArgTy = Args[0]->getType();
@@ -1482,6 +1483,7 @@
         return new CallInst(F, Args[0]);
       }
       break;
+#endif
     case 'c':
       if ((Name.length() <= 14 && !memcmp(&Name[5], "ctpop.i", 7)) ||
           (Name.length() <= 13 && !memcmp(&Name[5], "ctlz.i", 6)) ||
@@ -1493,11 +1495,13 @@
           OldF->setName("upgrd.rm." + Name);
 
         Function *NewF = cast<Function>(
-          CurModule.CurrentModule->getOrInsertFunction(Name, Type::Int32Ty, 
+          CurModule.CurrentModule->getOrInsertFunction(Name, ArgTy, 
                                                        ArgTy, (void*)0));
 
-        Instruction *Call = new CallInst(NewF, Args[0], "", CurBB);
-        return CastInst::createIntegerCast(Call, RetTy, false);
+        std::string InstName0(makeNameUnique("ct0"));
+        std::string InstName1(makeNameUnique("ct1"));
+        Instruction *Call = new CallInst(NewF, Args[0], InstName0, CurBB);
+        return CastInst::createIntegerCast(Call, RetTy, false, InstName1);
       }
       break;
 
Index: tools/llvm-upgrade/UpgradeParser.y
===================================================================
--- tools/llvm-upgrade/UpgradeParser.y	(revision 39829)
+++ tools/llvm-upgrade/UpgradeParser.y	(working copy)
@@ -1472,6 +1472,7 @@
         return new FCmpInst(FCmpInst::FCMP_UNO, Args[0], Args[1]);
       }
       break;
+#if 0
     case 'b':
       if (Name.length() == 14 && !memcmp(&Name[5], "bswap.i", 7)) {
         const Type* ArgTy = Args[0]->getType();
@@ -1482,6 +1483,7 @@
         return new CallInst(F, Args[0]);
       }
       break;
+#endif
     case 'c':
       if ((Name.length() <= 14 && !memcmp(&Name[5], "ctpop.i", 7)) ||
           (Name.length() <= 13 && !memcmp(&Name[5], "ctlz.i", 6)) ||
@@ -1493,11 +1495,13 @@
           OldF->setName("upgrd.rm." + Name);
 
         Function *NewF = cast<Function>(
-          CurModule.CurrentModule->getOrInsertFunction(Name, Type::Int32Ty, 
+          CurModule.CurrentModule->getOrInsertFunction(Name, ArgTy, 
                                                        ArgTy, (void*)0));
 
-        Instruction *Call = new CallInst(NewF, Args[0], "", CurBB);
-        return CastInst::createIntegerCast(Call, RetTy, false);
+        std::string InstName0(makeNameUnique("ct0"));
+        std::string InstName1(makeNameUnique("ct1"));
+        Instruction *Call = new CallInst(NewF, Args[0], InstName0, CurBB);
+        return CastInst::createIntegerCast(Call, RetTy, false, InstName1);
       }
       break;
 
Index: lib/Analysis/ConstantFolding.cpp
===================================================================
--- lib/Analysis/ConstantFolding.cpp	(revision 39829)
+++ lib/Analysis/ConstantFolding.cpp	(working copy)
@@ -315,10 +315,13 @@
   const std::string &Name = F->getName();
 
   switch (F->getIntrinsicID()) {
-  case Intrinsic::sqrt_f32:
-  case Intrinsic::sqrt_f64:
-  case Intrinsic::powi_f32:
-  case Intrinsic::powi_f64:
+  case Intrinsic::sqrt:
+  case Intrinsic::powi:
+  case Intrinsic::log:
+  case Intrinsic::exp:
+  case Intrinsic::pow:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
   case Intrinsic::bswap:
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
@@ -437,13 +440,13 @@
         return ConstantInt::get(Op->getValue().byteSwap());
       } else if (Name.size() > 11 && !memcmp(&Name[0],"llvm.ctpop",10)) {
         uint64_t ctpop = Op->getValue().countPopulation();
-        return ConstantInt::get(Type::Int32Ty, ctpop);
+        return ConstantInt::get(Ty, ctpop);
       } else if (Name.size() > 10 && !memcmp(&Name[0], "llvm.cttz", 9)) {
         uint64_t cttz = Op->getValue().countTrailingZeros();
-        return ConstantInt::get(Type::Int32Ty, cttz);
+        return ConstantInt::get(Ty, cttz);
       } else if (Name.size() > 10 && !memcmp(&Name[0], "llvm.ctlz", 9)) {
         uint64_t ctlz = Op->getValue().countLeadingZeros();
-        return ConstantInt::get(Type::Int32Ty, ctlz);
+        return ConstantInt::get(Ty, ctlz);
       }
     }
   } else if (NumOperands == 2) {
Index: lib/CodeGen/IntrinsicLowering.cpp
===================================================================
--- lib/CodeGen/IntrinsicLowering.cpp	(revision 39829)
+++ lib/CodeGen/IntrinsicLowering.cpp	(working copy)
@@ -98,15 +98,56 @@
                               PointerType::get(Type::Int8Ty), Type::Int32Ty, 
                               TD.getIntPtrType(), (Type *)0);
         break;
-      case Intrinsic::sqrt_f32:
-      case Intrinsic::sqrt_f64:
-        if(I->arg_begin()->getType() == Type::FloatTy)
-          EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(),
-                               Type::FloatTy);
+      case Intrinsic::sqrt:
+        if (I->arg_begin()->getType() == Type::FloatTy)
+          M.getOrInsertFunction("sqrtf", Type::FloatTy, Type::FloatTy,
+                                (Type *)0);
         else
-          EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(),
-                               Type::DoubleTy);
+          M.getOrInsertFunction("sqrt", Type::DoubleTy, Type::DoubleTy,
+                                (Type *)0);
         break;
+      case Intrinsic::sin:
+        if (I->arg_begin()->getType() == Type::FloatTy)
+          M.getOrInsertFunction("sinf", Type::FloatTy, Type::FloatTy,
+                                (Type *)0);
+        else
+          M.getOrInsertFunction("sin", Type::DoubleTy, Type::DoubleTy,
+                                (Type *)0);
+        break;
+      case Intrinsic::cos:
+        if (I->arg_begin()->getType() == Type::FloatTy)
+          M.getOrInsertFunction("cosf", Type::FloatTy, Type::FloatTy,
+                                (Type *)0);
+        else
+          M.getOrInsertFunction("cos", Type::DoubleTy, Type::DoubleTy,
+                                (Type *)0);
+        break;
+      case Intrinsic::exp:
+        if (I->arg_begin()->getType() == Type::FloatTy)
+          M.getOrInsertFunction("expf", Type::FloatTy, Type::FloatTy,
+                                (Type *)0);
+        else
+          M.getOrInsertFunction("exp", Type::DoubleTy, Type::DoubleTy,
+                                (Type *)0);
+        break;
+      case Intrinsic::log:
+        if (I->arg_begin()->getType() == Type::FloatTy)
+          M.getOrInsertFunction("logf", Type::FloatTy, Type::FloatTy,
+                                (Type *)0);
+        else
+          M.getOrInsertFunction("log", Type::DoubleTy, Type::DoubleTy,
+                                (Type *)0);
+        break;
+      case Intrinsic::pow:
+        if (I->arg_begin()->getType() == Type::FloatTy)
+          M.getOrInsertFunction("powf", Type::FloatTy,
+                                Type::FloatTy, Type::FloatTy,
+                                (Type *)0);
+        else
+          M.getOrInsertFunction("pow", Type::DoubleTy,
+                                Type::DoubleTy, Type::DoubleTy,
+                                (Type *)0);
+        break;
       }
 }
 
@@ -779,19 +820,18 @@
                     MemsetFCache);
     break;
   }
-  case Intrinsic::sqrt_f32: {
-    static Constant *sqrtfFCache = 0;
-    ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
-                    Type::FloatTy, sqrtfFCache);
+  case Intrinsic::sqrt:
+    if (CI->getType() == Type::FloatTy) {
+      static Constant *sqrtfFCache = 0;
+      ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
+                      Type::FloatTy, sqrtfFCache);
+    } else {
+      static Constant *sqrtFCache = 0;
+      ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
+                      Type::DoubleTy, sqrtFCache);
+    }
     break;
   }
-  case Intrinsic::sqrt_f64: {
-    static Constant *sqrtFCache = 0;
-    ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
-                    Type::DoubleTy, sqrtFCache);
-    break;
-  }
-  }
 
   assert(CI->use_empty() &&
          "Lowering should have eliminated any uses of the intrinsic call!");
Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp	(revision 39829)
+++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp	(working copy)
@@ -2721,19 +2721,43 @@
     return 0;
   }
 
-  case Intrinsic::sqrt_f32:
-  case Intrinsic::sqrt_f64:
+  case Intrinsic::sqrt:
     setValue(&I, DAG.getNode(ISD::FSQRT,
                              getValue(I.getOperand(1)).getValueType(),
                              getValue(I.getOperand(1))));
     return 0;
-  case Intrinsic::powi_f32:
-  case Intrinsic::powi_f64:
+  case Intrinsic::powi:
     setValue(&I, DAG.getNode(ISD::FPOWI,
                              getValue(I.getOperand(1)).getValueType(),
                              getValue(I.getOperand(1)),
                              getValue(I.getOperand(2))));
     return 0;
+  case Intrinsic::sin:
+    setValue(&I, DAG.getNode(ISD::FSIN,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cos:
+    setValue(&I, DAG.getNode(ISD::FCOS,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::exp:
+    setValue(&I, DAG.getNode(ISD::FEXP,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::log:
+    setValue(&I, DAG.getNode(ISD::FLOG,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::pow:
+    setValue(&I, DAG.getNode(ISD::FPOW,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1)),
+                             getValue(I.getOperand(2))));
+    return 0;
   case Intrinsic::pcmarker: {
     SDOperand Tmp = getValue(I.getOperand(1));
     DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
@@ -2767,10 +2791,6 @@
     SDOperand Arg = getValue(I.getOperand(1));
     MVT::ValueType Ty = Arg.getValueType();
     SDOperand result = DAG.getNode(ISD::CTTZ, Ty, Arg);
-    if (Ty < MVT::i32)
-      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
-    else if (Ty > MVT::i32)
-      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
     setValue(&I, result);
     return 0;
   }
@@ -2778,10 +2798,6 @@
     SDOperand Arg = getValue(I.getOperand(1));
     MVT::ValueType Ty = Arg.getValueType();
     SDOperand result = DAG.getNode(ISD::CTLZ, Ty, Arg);
-    if (Ty < MVT::i32)
-      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
-    else if (Ty > MVT::i32)
-      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
     setValue(&I, result);
     return 0;
   }
@@ -2789,10 +2805,6 @@
     SDOperand Arg = getValue(I.getOperand(1));
     MVT::ValueType Ty = Arg.getValueType();
     SDOperand result = DAG.getNode(ISD::CTPOP, Ty, Arg);
-    if (Ty < MVT::i32)
-      result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
-    else if (Ty > MVT::i32)
-      result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
     setValue(&I, result);
     return 0;
   }
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp	(revision 39829)
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp	(working copy)
@@ -3410,6 +3410,9 @@
   case ISD::FSIN:   return "fsin";
   case ISD::FCOS:   return "fcos";
   case ISD::FPOWI:  return "fpowi";
+  case ISD::FEXP:   return "fexp";
+  case ISD::FLOG:   return "flog";
+  case ISD::FPOW:   return "fpow";
 
   // Binary operators
   case ISD::ADD:    return "add";
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	(revision 39829)
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	(working copy)
@@ -146,6 +146,12 @@
   /// result.
   SDOperand LegalizeOp(SDOperand O);
   
+  /// UnrollVector - We know that the given vector has a legal type, however
+  /// the operation it performs is not legal and is an operation that we have
+  /// no way of lowering.  "Unroll" the vector, splitting out the scalars and
+  /// operating on each element individually.
+  SDOperand UnrollVector(SDOperand O);
+
   /// PromoteOp - Given an operation that produces a value in an invalid type,
   /// promote it to compute the value into a larger type.  The produced value
   /// will have the correct bits for the low portion of the register, but no
@@ -551,6 +557,48 @@
 }
 
 
+/// UnrollVector - We know that the given vector has a legal type, however
+/// the operation it performs is not legal and is an operation that we have
+/// no way of lowering.  "Unroll" the vector, splitting out the scalars and
+/// operating on each element individually.
+///
+/// This routine generates very poor code in the case where there is a
+/// sequence of illegal operations that are lowered into calls.  The first
+/// vector is completely lowered first, and the scalar calls are all emitted
+/// with their chains, and that forces the whole first vector operation to
+/// be complete before any of the elements of the second vector operation
+/// can start.  In general, look for ways to lower operations with other
+/// vector operations, or use vector library routines, instead of relying on
+/// this routine.
+SDOperand SelectionDAGLegalize::UnrollVector(SDOperand Op) {
+  MVT::ValueType VT = Op.getValueType();
+  assert(isTypeLegal(VT) &&
+         "Caller should expand or promote operands that are not legal!");
+  assert(Op.Val->getNumValues() == 1 &&
+         "Can't unroll a vector with multiple results!");
+  unsigned NE = MVT::getVectorNumElements(VT);
+  MVT::ValueType EltVT = MVT::getVectorElementType(VT);
+
+  SmallVector<SDOperand, 8> Scalars;
+  SmallVector<SDOperand, 4> Operands(Op.getNumOperands());
+  for (unsigned i = 0; i != NE; ++i) {
+    for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+      SDOperand Operand = Op.getOperand(j);
+      MVT::ValueType OperandVT = Operand.getValueType();
+      MVT::ValueType OperandEltVT = MVT::getVectorElementType(OperandVT);
+      Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                                OperandEltVT,
+                                Operand,
+                                DAG.getConstant(i, MVT::i32));
+    }
+    Scalars.push_back(DAG.getNode(Op.getOpcode(), EltVT,
+                                  &Operands[0], Operands.size()));
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, VT, &Scalars[0], Scalars.size());
+}
+
+
 /// LegalizeOp - We know that the specified value has a legal type, and
 /// that its operands are legal.  Now ensure that the operation itself
 /// is legal, recursively ensuring that the operands' operations remain
@@ -2306,6 +2354,7 @@
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FDIV:
+  case ISD::FPOW:
     Tmp1 = LegalizeOp(Node->getOperand(0));   // LHS
     switch (getTypeAction(Node->getOperand(1).getValueType())) {
     case Expand: assert(0 && "Not possible");
@@ -2344,18 +2393,7 @@
       assert(MVT::isVector(Node->getValueType(0)) &&
              "Cannot expand this binary operator!");
       // Expand the operation into a bunch of nasty scalar code.
-      SmallVector<SDOperand, 8> Ops;
-      MVT::ValueType EltVT = MVT::getVectorElementType(Node->getValueType(0));
-      MVT::ValueType PtrVT = TLI.getPointerTy();
-      for (unsigned i = 0, e = MVT::getVectorNumElements(Node->getValueType(0));
-           i != e; ++i) {
-        SDOperand Idx = DAG.getConstant(i, PtrVT);
-        SDOperand LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1, Idx);
-        SDOperand RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2, Idx);
-        Ops.push_back(DAG.getNode(Node->getOpcode(), EltVT, LHS, RHS));
-      }
-      Result = DAG.getNode(ISD::BUILD_VECTOR, Node->getValueType(0), 
-                           &Ops[0], Ops.size());
+      Result = LegalizeOp(UnrollVector(Op));
       break;
     }
     case TargetLowering::Promote: {
@@ -2750,6 +2788,8 @@
   case ISD::FSQRT:
   case ISD::FSIN:
   case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FLOG:
     Tmp1 = LegalizeOp(Node->getOperand(0));
     switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
     case TargetLowering::Promote:
@@ -2764,6 +2804,12 @@
       }
       break;
     case TargetLowering::Expand:
+      // Expand unsupported unary vector operators by unrolling them.
+      if (MVT::isVector(Node->getValueType(0))) {
+        Result = LegalizeOp(UnrollVector(Op));
+        break;
+      }
+
       switch (Node->getOpcode()) {
       default: assert(0 && "Unreachable!");
       case ISD::FNEG:
@@ -2780,6 +2826,8 @@
         Result = DAG.getNode(ISD::SELECT, VT, Tmp2, Tmp1, Tmp3);
         break;
       }
+      case ISD::FLOG:
+      case ISD::FEXP:
       case ISD::FSQRT:
       case ISD::FSIN:
       case ISD::FCOS: {
@@ -2795,6 +2843,12 @@
         case ISD::FCOS:
           LC = VT == MVT::f32 ? RTLIB::COS_F32 : RTLIB::COS_F64;
           break;
+        case ISD::FLOG:
+          LC = VT == MVT::f32 ? RTLIB::LOG_F32 : RTLIB::LOG_F64;
+          break;
+        case ISD::FEXP:
+          LC = VT == MVT::f32 ? RTLIB::EXP_F32 : RTLIB::EXP_F64;
+          break;
         default: assert(0 && "Unreachable!");
         }
         SDOperand Dummy;
@@ -2807,6 +2861,13 @@
     }
     break;
   case ISD::FPOWI: {
+    // Expand vector powi by unrolling it.
+    if (!TLI.isOperationLegal(Node->getOpcode(), Node->getValueType(0)) &&
+        MVT::isVector(Node->getValueType(0))) {
+      Result = LegalizeOp(UnrollVector(Op));
+      break;
+    }
+
     // We always lower FPOWI into a libcall.  No target support it yet.
     RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
       ? RTLIB::POWI_F32 : RTLIB::POWI_F64;
@@ -5551,6 +5612,8 @@
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::FDIV:
+  case ISD::FPOW:
+  case ISD::FPOWI:
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -5562,6 +5625,26 @@
     Hi = DAG.getNode(Node->getOpcode(), NewVT, LH, RH);
     break;
   }
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+  case ISD::FLOG:
+  case ISD::FEXP: {
+    SDOperand L, H;
+    SplitVectorOp(Node->getOperand(0), L, H);
+    
+    Lo = DAG.getNode(Node->getOpcode(), NewVT, L);
+    Hi = DAG.getNode(Node->getOpcode(), NewVT, H);
+    break;
+  }
+  case ISD::CopyFromReg:
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, NewVT, Op,
+                     DAG.getConstant(0, MVT::i32));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, NewVT, Op,
+                     DAG.getConstant(NewNumElts, MVT::i32));
+    break;
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Node);
     SDOperand Ch = LD->getChain();
@@ -5649,6 +5732,8 @@
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::FDIV:
+  case ISD::FPOW:
+  case ISD::FPOWI:
   case ISD::SREM:
   case ISD::UREM:
   case ISD::FREM:
@@ -5665,6 +5750,8 @@
   case ISD::FSQRT:
   case ISD::FSIN:
   case ISD::FCOS:
+  case ISD::FLOG:
+  case ISD::FEXP:
     Result = DAG.getNode(Node->getOpcode(),
                          NewVT, 
                          ScalarizeVectorOp(Node->getOperand(0)));
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp	(revision 39829)
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp	(working copy)
@@ -62,6 +62,12 @@
   Names[RTLIB::SIN_F64] = "sin";
   Names[RTLIB::COS_F32] = "cosf";
   Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::LOG_F32] = "logf";
+  Names[RTLIB::LOG_F64] = "log";
+  Names[RTLIB::EXP_F32] = "expf";
+  Names[RTLIB::EXP_F64] = "exp";
+  Names[RTLIB::POW_F32] = "powf";
+  Names[RTLIB::POW_F64] = "pow";
   Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
   Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
   Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp	(revision 39829)
+++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
@@ -257,6 +257,16 @@
   else
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
 
+  // We don't support pow/log/exp/powi
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FLOG , MVT::f64, Expand);
+  setOperationAction(ISD::FEXP , MVT::f64, Expand);
+  setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FLOG , MVT::f32, Expand);
+  setOperationAction(ISD::FEXP , MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+
   if (X86ScalarSSE) {
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -338,6 +348,14 @@
     setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
     setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FLOG, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FEXP, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
   }
 
   if (Subtarget->hasMMX()) {
Index: lib/Target/X86/X86TargetAsmInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetAsmInfo.cpp	(revision 39829)
+++ lib/Target/X86/X86TargetAsmInfo.cpp	(working copy)
@@ -215,9 +215,9 @@
     return false;
   
   // Okay, we can do this xform, do so now.
-  const Type *Tys[] = { Ty, Ty };
+  const Type *Tys[] = { Ty };
   Module *M = CI->getParent()->getParent()->getParent();
-  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
   
   Value *Op = CI->getOperand(1);
   Op = new CallInst(Int, Op, CI->getName(), CI);
Index: lib/Target/CBackend/CBackend.cpp
===================================================================
--- lib/Target/CBackend/CBackend.cpp	(revision 39829)
+++ lib/Target/CBackend/CBackend.cpp	(working copy)
@@ -2405,8 +2405,7 @@
           case Intrinsic::longjmp:
           case Intrinsic::prefetch:
           case Intrinsic::dbg_stoppoint:
-          case Intrinsic::powi_f32:
-          case Intrinsic::powi_f64:
+          case Intrinsic::powi:
             // We directly implement these intrinsics
             break;
           default:
@@ -2525,8 +2524,7 @@
         writeOperand(I.getOperand(1));
         Out << ')';
         return;
-      case Intrinsic::powi_f32:
-      case Intrinsic::powi_f64:
+      case Intrinsic::powi:
         Out << "__builtin_powi(";
         writeOperand(I.getOperand(1));
         Out << ", ";
Index: lib/VMCore/Verifier.cpp
===================================================================
--- lib/VMCore/Verifier.cpp	(revision 39829)
+++ lib/VMCore/Verifier.cpp	(working copy)
@@ -53,6 +53,7 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/Streams.h"
@@ -225,7 +226,8 @@
     void visitUserOp2(Instruction &I) { visitUserOp1(I); }
     void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
 
-    void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, ...);
+    void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+                                  unsigned Count, ...);
 
     void WriteValue(const Value *V) {
       if (!V) return;
@@ -1030,9 +1032,11 @@
 /// VerifyIntrinsicPrototype - TableGen emits calls to this function into
 /// Intrinsics.gen.  This implements a little state machine that verifies the
 /// prototype of intrinsics.
-void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, ...) {
+void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID,
+                                        Function *F,
+                                        unsigned Count, ...) {
   va_list VA;
-  va_start(VA, F);
+  va_start(VA, Count);
   
   const FunctionType *FTy = F->getFunctionType();
   
@@ -1041,97 +1045,104 @@
   // suffix, to be checked at the end.
   std::string Suffix;
 
+  if (FTy->getNumParams() + FTy->isVarArg() != Count - 1) {
+    CheckFailed("Intrinsic prototype has incorrect number of arguments!", F);
+    return;
+  }
+
   // Note that "arg#0" is the return type.
-  for (unsigned ArgNo = 0; 1; ++ArgNo) {
-    int TypeID = va_arg(VA, int);
+  for (unsigned ArgNo = 0; ArgNo < Count; ++ArgNo) {
+    MVT::ValueType VT = va_arg(VA, MVT::ValueType);
 
-    if (TypeID == -2) {
+    if (VT == MVT::isVoid && ArgNo > 0) {
+      if (!FTy->isVarArg())
+        CheckFailed("Intrinsic prototype has no '...'!", F);
       break;
     }
 
-    if (TypeID == -1) {
-      if (ArgNo != FTy->getNumParams()+1)
-        CheckFailed("Intrinsic prototype has too many arguments!", F);
-      break;
-    }
-
-    if (ArgNo == FTy->getNumParams()+1) {
-      CheckFailed("Intrinsic prototype has too few arguments!", F);
-      break;
-    }
-    
     const Type *Ty;
     if (ArgNo == 0)
       Ty = FTy->getReturnType();
     else
       Ty = FTy->getParamType(ArgNo-1);
+
+    unsigned NumElts = 0;
+    const Type *EltTy = Ty;
+    if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+      EltTy = VTy->getElementType();
+      NumElts = VTy->getNumElements();
+    }
     
-    if (TypeID != Ty->getTypeID()) {
-      if (ArgNo == 0)
-        CheckFailed("Intrinsic prototype has incorrect result type!", F);
-      else
-        CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " is wrong!",F);
-      break;
-    }
-
-    if (TypeID == Type::IntegerTyID) {
-      unsigned ExpectedBits = (unsigned) va_arg(VA, int);
-      unsigned GotBits = cast<IntegerType>(Ty)->getBitWidth();
-      if (ExpectedBits == 0) {
-        Suffix += ".i" + utostr(GotBits);
-      } else if (GotBits != ExpectedBits) {
-        std::string bitmsg = " Expected " + utostr(ExpectedBits) + " but got "+
-                             utostr(GotBits) + " bits.";
-        if (ArgNo == 0)
-          CheckFailed("Intrinsic prototype has incorrect integer result width!"
-                      + bitmsg, F);
-        else
-          CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " has "
-                      "incorrect integer width!" + bitmsg, F);
-        break;
+    if ((int)VT < 0) {
+      int Match = ~VT;
+      if (Match == 0) {
+        if (Ty != FTy->getReturnType()) {
+          CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " does not "
+                      "match return type.", F);
+          break;
+        }
+      } else {
+        if (Ty != FTy->getParamType(Match-1)) {
+          CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " does not "
+                      "match parameter %" + utostr(Match-1) + ".", F);
+          break;
+        }
       }
+    } else if (VT == MVT::iAny) {
+      unsigned GotBits = cast<IntegerType>(EltTy)->getBitWidth();
+      Suffix += ".";
+      if (EltTy != Ty)
+        Suffix += "v" + utostr(NumElts);
+      Suffix += "i" + utostr(GotBits);;
       // Check some constraints on various intrinsics.
       switch (ID) {
         default: break; // Not everything needs to be checked.
         case Intrinsic::bswap:
           if (GotBits < 16 || GotBits % 16 != 0)
             CheckFailed("Intrinsic requires even byte width argument", F);
-          /* FALL THROUGH */
-        case Intrinsic::part_set:
-        case Intrinsic::part_select:
-          if (ArgNo == 1) {
-            unsigned ResultBits = 
-              cast<IntegerType>(FTy->getReturnType())->getBitWidth();
-            if (GotBits != ResultBits)
-              CheckFailed("Intrinsic requires the bit widths of the first "
-                          "parameter and the result to match", F);
-          }
           break;
       }
-    } else if (TypeID == Type::VectorTyID) {
-      // If this is a packed argument, verify the number and type of elements.
-      const VectorType *PTy = cast<VectorType>(Ty);
-      int ElemTy = va_arg(VA, int);
-      if (ElemTy != PTy->getElementType()->getTypeID()) {
+    } else if (VT == MVT::fAny) {
+      Suffix += ".";
+      if (VT == MVT::fAny && EltTy != Ty)
+        Suffix += "v" + utostr(NumElts);
+      Suffix += MVT::getValueTypeString(MVT::getValueType(EltTy));
+      if (!EltTy->isFloatingPoint()) {
+        CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " is not " +
+                    "a floating-point type.", F);
+        break;
+      }
+    } else if (VT == MVT::iPTR) {
+      if (!isa<PointerType>(Ty)) {
+        CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " is not a "
+                    "pointer and a pointer is required.", F);
+        break;
+      }
+    } else if (MVT::isVector(VT)) {
+      // If this is a vector argument, verify the number and type of elements.
+      if (MVT::getVectorElementType(VT) != MVT::getValueType(EltTy)) {
         CheckFailed("Intrinsic prototype has incorrect vector element type!",
                     F);
         break;
       }
-      if (ElemTy == Type::IntegerTyID) {
-        unsigned NumBits = (unsigned)va_arg(VA, int);
-        unsigned ExpectedBits = 
-          cast<IntegerType>(PTy->getElementType())->getBitWidth();
-        if (NumBits != ExpectedBits) {
-          CheckFailed("Intrinsic prototype has incorrect vector element type!",
-                      F);
-          break;
-        }
-      }
-      if ((unsigned)va_arg(VA, int) != PTy->getNumElements()) {
+      if (MVT::getVectorNumElements(VT) != NumElts) {
         CheckFailed("Intrinsic prototype has incorrect number of "
                     "vector elements!",F);
-          break;
+        break;
       }
+    } else if (MVT::getTypeForValueType(VT) != EltTy) {
+      if (ArgNo == 0)
+        CheckFailed("Intrinsic prototype has incorrect result type!", F);
+      else
+        CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " is wrong!",F);
+      break;
+    } else if (EltTy != Ty) {
+      if (ArgNo == 0)
+        CheckFailed("Intrinsic result type is vector "
+                    "and a scalar is required.", F);
+      else
+        CheckFailed("Intrinsic parameter #" + utostr(ArgNo-1) + " is vector "
+                    "and a scalar is required.", F);
     }
   }
 
Index: lib/VMCore/Function.cpp
===================================================================
--- lib/VMCore/Function.cpp	(revision 39829)
+++ lib/VMCore/Function.cpp	(working copy)
@@ -15,6 +15,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/ParameterAttributes.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "SymbolTableListTraitsImpl.h"
Index: lib/Transforms/Scalar/InstructionCombining.cpp
===================================================================
--- lib/Transforms/Scalar/InstructionCombining.cpp	(revision 39829)
+++ lib/Transforms/Scalar/InstructionCombining.cpp	(working copy)
@@ -3706,9 +3706,9 @@
   for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
     if (ByteValues[i] != V)
       return 0;
-  const Type *Tys[] = { ITy, ITy };
+  const Type *Tys[] = { ITy };
   Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
   return new CallInst(F, V);
 }