[PATCH] Model sqrtsd as a binary operation with one source operand tied to the destination (PR14221)

Thu Jan 8 14:16:18 PST 2015

Hi delena, hliao, nadav,

This patch attempts to fix the following miscompile:

  define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp {
    %0 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a) nounwind 
    %a0 = extractelement <2 x double> %0, i32 0
    %conv = fptrunc double %a0 to float
    %a1 = extractelement <2 x double> %0, i32 1
    %conv3 = fptrunc double %a1 to float
    tail call void @callee2(float %conv, float %conv3) nounwind
    ret void
  }

Current codegen:
  sqrtsd	%xmm0, %xmm1        ## high element of %xmm1 is undef here
  xorps	%xmm0, %xmm0
  cvtsd2ss	%xmm1, %xmm0
  shufpd	$1, %xmm1, %xmm1
  cvtsd2ss	%xmm1, %xmm1 ## operating on undef value
  jmp	_callee 


This is a continuation of http://llvm.org/viewvc/llvm-project?view=revision&revision=224624 ( http://reviews.llvm.org/D6330 ) which was itself a continuation of r167064 ( http://llvm.org/viewvc/llvm-project?view=revision&revision=167064 ). 

All of these patches are partial fixes for PR14221 ( http://llvm.org/bugs/show_bug.cgi?id=14221 ); this should be the final patch needed to resolve that bug.

http://reviews.llvm.org/D6885

Files:
  lib/Target/X86/X86InstrSSE.td
  test/CodeGen/X86/sse_partial_update.ll

Index: lib/Target/X86/X86InstrSSE.td
===================================================================

--- lib/Target/X86/X86InstrSSE.td
+++ lib/Target/X86/X86InstrSSE.td
@@ -3664,8 +3664,10 @@
 }
 
 /// sse2_fp_unop_s - SSE2 unops in scalar form.
+// FIXME: Combine the following sse2 classes with the sse1 classes above.
+// The only usage of these is for SQRT[S/P]D. See sse12_fp_binop* for example.
 multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
-                          SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
+                          SDNode OpNode, OpndItins itins> {
 let Predicates = [HasAVX], hasSideEffects = 0 in {
   def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
                       (ins FR64:$src1, FR64:$src2),
@@ -3698,16 +3700,18 @@
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
             Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
-let isCodeGenOnly = 1 in {
-  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
-                Sched<[itins.Sched]>;
-  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
-                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
-                Sched<[itins.Sched.Folded]>;
-}
+  let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
+  def SDr_Int :
+    SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+    !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+    [], itins.rr>, Sched<[itins.Sched]>;
+  
+  let mayLoad = 1, hasSideEffects = 0 in
+  def SDm_Int :
+    SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+    !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+    [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  } // isCodeGenOnly, Constraints
 }
 
 /// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3749,8 +3753,7 @@
 // Square root.
 defm SQRT  : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
              sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
-             sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
-                            SSE_SQRTSD>,
+             sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
              sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
 
 // Reciprocal approximations. Note that these typically require refinement
@@ -3829,6 +3832,8 @@
             (RCPSSr_Int VR128:$src, VR128:$src)>;
   def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
             (SQRTSSr_Int VR128:$src, VR128:$src)>;
+  def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
+            (SQRTSDr_Int VR128:$src, VR128:$src)>;
 }
 
 // There is no f64 version of the reciprocal approximation instructions.
Index: test/CodeGen/X86/sse_partial_update.ll
===================================================================
--- test/CodeGen/X86/sse_partial_update.ll
+++ test/CodeGen/X86/sse_partial_update.ll
@@ -67,3 +67,26 @@
   ret void
 }
 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp {
+entry:
+; CHECK-LABEL: sqrtsd:
+; CHECK: sqrtsd %xmm0, %xmm0
+; CHECK-NEXT: cvtsd2ss %xmm0
+; CHECK-NEXT: shufpd
+; CHECK-NEXT: cvtsd2ss %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
+
+ %0 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a) nounwind
+ %a0 = extractelement <2 x double> %0, i32 0
+ %conv = fptrunc double %a0 to float
+ %a1 = extractelement <2 x double> %0, i32 1
+ %conv3 = fptrunc double %a1 to float
+ tail call void @callee2(float %conv, float %conv3) nounwind
+ ret void
+}
+
+declare void @callee2(float, float)
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D6885.17911.patch
Type: text/x-patch
Size: 4022 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150108/bbb5e978/attachment.bin>