[llvm-commits] [llvm] r57370 - in /llvm/trunk: include/llvm/IntrinsicsX86.td lib/Target/X86/X86InstrSSE.td

Dale Johannesen dalej at apple.com
Fri Oct 10 16:51:05 PDT 2008


Author: johannes
Date: Fri Oct 10 18:51:03 2008
New Revision: 57370

URL: http://llvm.org/viewvc/llvm-project?rev=57370&view=rev
Log:
Fix SSE4.1 roundss, roundsd.  While the instructions have 
the same pattern as roundpd/roundps, the Intel compiler 
builtins do not:  rounds* has an extra operand.  Fixes
gcc.target/i386/sse4_1-rounds[sd]-[1234].c


Modified:
    llvm/trunk/include/llvm/IntrinsicsX86.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/include/llvm/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=57370&r1=57369&r2=57370&view=diff

==============================================================================
--- llvm/trunk/include/llvm/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IntrinsicsX86.td Fri Oct 10 18:51:03 2008
@@ -685,13 +685,13 @@
 // FP rounding ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_round_ss        : GCCBuiltin<"__builtin_ia32_roundss">,
-              Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
+              Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_sse41_round_ps        : GCCBuiltin<"__builtin_ia32_roundps">,
               Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_sse41_round_sd        : GCCBuiltin<"__builtin_ia32_roundsd">,
-              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_sse41_round_pd        : GCCBuiltin<"__builtin_ia32_roundpd">,
               Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=57370&r1=57369&r2=57370&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Oct 10 18:51:03 2008
@@ -3169,29 +3169,11 @@
 // SSE4.1 Instructions
 //===----------------------------------------------------------------------===//
 
-multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps, 
-                            bits<8> opcsd, bits<8> opcpd, 
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
                             string OpcodeStr,
-                            Intrinsic F32Int,
                             Intrinsic V4F32Int,
-                            Intrinsic F64Int,
                             Intrinsic V2F64Int> {
   // Intrinsic operation, reg.
-  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst, (F32Int VR128:$src1, imm:$src2))]>,
-                    OpSize;
-
-  // Intrinsic operation, mem.
-  def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 
-                    (outs VR128:$dst), (ins ssmem:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr, 
-                    "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst, (F32Int sse_load_f32:$src1, imm:$src2))]>,
-                    OpSize;
-
   // Vector intrinsic operation, reg
   def PSr_Int : SS4AIi8<opcps, MRMSrcReg, 
                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3209,22 +3191,6 @@
                           (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
                     OpSize;
 
-  // Intrinsic operation, reg.
-  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst, (F64Int VR128:$src1, imm:$src2))]>,
-                    OpSize;
-
-  // Intrinsic operation, mem.
-  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
-                    (outs VR128:$dst), (ins sdmem:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst, (F64Int sse_load_f64:$src1, imm:$src2))]>,
-                    OpSize;
-
   // Vector intrinsic operation, reg
   def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3243,10 +3209,58 @@
                     OpSize;
 }
 
+let Constraints = "$src1 = $dst" in {
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+                            string OpcodeStr,
+                            Intrinsic F32Int,
+                            Intrinsic F64Int> {
+  // Intrinsic operation, reg.
+  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+                    (outs VR128:$dst), 
+                                 (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+                    !strconcat(OpcodeStr,
+                    "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                    [(set VR128:$dst, 
+                            (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+                    OpSize;
+
+  // Intrinsic operation, mem.
+  def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 
+                    (outs VR128:$dst), 
+                                (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+                    !strconcat(OpcodeStr, 
+                    "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                    [(set VR128:$dst, 
+                         (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+                    OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+                    (outs VR128:$dst), 
+                            (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+                    !strconcat(OpcodeStr,
+                    "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                    [(set VR128:$dst, 
+                            (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+                    OpSize;
+
+  // Intrinsic operation, mem.
+  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+                    (outs VR128:$dst), 
+                            (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+                    !strconcat(OpcodeStr,
+                    "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                    [(set VR128:$dst, 
+                        (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+                    OpSize;
+}
+}
+
 // FP round - roundss, roundps, roundsd, roundpd
-defm ROUND  : sse41_fp_unop_rm<0x0A, 0x08, 0x0B, 0x09, "round",
-                               int_x86_sse41_round_ss, int_x86_sse41_round_ps,
-                               int_x86_sse41_round_sd, int_x86_sse41_round_pd>;
+defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round",
+                               int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+                               int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
 
 // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
 multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,





More information about the llvm-commits mailing list