[llvm] r278106 - [X86][SSE] Fix memory folding of (v)roundsd / (v)roundss

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 9 02:32:34 PDT 2016


Author: rksimon
Date: Tue Aug  9 04:32:34 2016
New Revision: 278106

URL: http://llvm.org/viewvc/llvm-project?rev=278106&view=rev
Log:
[X86][SSE] Fix memory folding of (v)roundsd / (v)roundss

We only had partial memory folding support for the intrinsic definitions, and (as noted on PR27481) was causing FR32/FR64/VR128 mismatch errors with the machine verifier.

This patch adds missing memory folding support for both intrinsics and the ffloor/fnearbyint/fceil/frint/ftrunc patterns and in doing so fixes the failing machine verifier stack folding tests from PR27481.

Differential Revision: https://reviews.llvm.org/D23276

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
    llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=278106&r1=278105&r2=278106&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Aug  9 04:32:34 2016
@@ -1116,6 +1116,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
     { X86::ROUNDSDr,        X86::ROUNDSDm,      0 },
     { X86::ROUNDSSr,        X86::ROUNDSSm,      0 },
+    { X86::ROUNDSDr_Int,    X86::ROUNDSDm_Int,  0 },
+    { X86::ROUNDSSr_Int,    X86::ROUNDSSm_Int,  0 },
     { X86::SBB32rr,         X86::SBB32rm,       0 },
     { X86::SBB64rr,         X86::SBB64rm,       0 },
     { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
@@ -1412,6 +1414,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPXORrr,           X86::VPXORrm,            0 },
     { X86::VROUNDSDr,         X86::VROUNDSDm,          0 },
     { X86::VROUNDSSr,         X86::VROUNDSSm,          0 },
+    { X86::VROUNDSDr_Int,     X86::VROUNDSDm_Int,      0 },
+    { X86::VROUNDSSr_Int,     X86::VROUNDSSm_Int,      0 },
     { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
     { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
     { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
@@ -6208,9 +6212,11 @@ static bool hasPartialRegUpdate(unsigned
   case X86::ROUNDSDr:
   case X86::ROUNDSDm:
   case X86::ROUNDSDr_Int:
+  case X86::ROUNDSDm_Int:
   case X86::ROUNDSSr:
   case X86::ROUNDSSm:
   case X86::ROUNDSSr_Int:
+  case X86::ROUNDSSm_Int:
   case X86::RSQRTSSr:
   case X86::RSQRTSSm:
   case X86::RSQRTSSr_Int:
@@ -6289,9 +6295,11 @@ static bool hasUndefRegUpdate(unsigned O
   case X86::VROUNDSDr:
   case X86::VROUNDSDm:
   case X86::VROUNDSDr_Int:
+  case X86::VROUNDSDm_Int:
   case X86::VROUNDSSr:
   case X86::VROUNDSSm:
   case X86::VROUNDSSr_Int:
+  case X86::VROUNDSSm_Int:
   case X86::VRSQRTSSr:
   case X86::VRSQRTSSr_Int:
   case X86::VRSQRTSSm:

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=278106&r1=278105&r2=278106&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Aug  9 04:32:34 2016
@@ -6409,6 +6409,17 @@ let ExeDomain = GenericDomain in {
               "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       []>, Sched<[WriteFAdd]>;
 
+  // Operation, mem.
+  let mayLoad = 1 in
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
+        (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        []>, Sched<[WriteFAddLd, ReadAfterLd]>;
+
   // Intrinsic operation, reg.
   let isCodeGenOnly = 1 in
   def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
@@ -6422,7 +6433,8 @@ let ExeDomain = GenericDomain in {
         Sched<[WriteFAdd]>;
 
   // Intrinsic operation, mem.
-  def SSm : SS4AIi8<opcss, MRMSrcMem,
+  let isCodeGenOnly = 1 in
+  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -6444,6 +6456,17 @@ let ExeDomain = GenericDomain in {
                 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
         []>, Sched<[WriteFAdd]>;
 
+  // Operation, mem.
+  let mayLoad = 1 in
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+        (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        []>, Sched<[WriteFAddLd, ReadAfterLd]>;
+
   // Intrinsic operation, reg.
   let isCodeGenOnly = 1 in
   def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
@@ -6457,7 +6480,8 @@ let ExeDomain = GenericDomain in {
         Sched<[WriteFAdd]>;
 
   // Intrinsic operation, mem.
-  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+  let isCodeGenOnly = 1 in
+  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll?rev=278106&r1=278105&r2=278106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll Tue Aug  9 04:32:34 2016
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
+; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -1559,7 +1559,13 @@ define double @stack_fold_roundsd(double
 }
 declare double @llvm.floor.f64(double) nounwind readnone
 
-; TODO stack_fold_roundsd_int
+define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_roundsd_int
+  ;CHECK:       vroundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
+  ret <2 x double> %2
+}
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
 
 define float @stack_fold_roundss(float %a0) optsize {
@@ -1571,7 +1577,13 @@ define float @stack_fold_roundss(float %
 }
 declare float @llvm.floor.f32(float) nounwind readnone
 
-; TODO stack_fold_roundss_int
+define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_roundss_int
+  ;CHECK:       vroundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
+  ret <4 x float> %2
+}
 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
 
 ; TODO stack_fold_rsqrtps

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll?rev=278106&r1=278105&r2=278106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll Tue Aug  9 04:32:34 2016
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
+; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -987,7 +987,13 @@ define double @stack_fold_roundsd(double
 }
 declare double @llvm.floor.f64(double) nounwind readnone
 
-; TODO stack_fold_roundsd_int
+define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_roundsd_int
+  ;CHECK:       roundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
+  ret <2 x double> %2
+}
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
 
 define float @stack_fold_roundss(float %a0) minsize {
@@ -999,7 +1005,13 @@ define float @stack_fold_roundss(float %
 }
 declare float @llvm.floor.f32(float) nounwind readnone
 
-; TODO stack_fold_roundss_int
+define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_roundss_int
+  ;CHECK:       roundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
+  ret <4 x float> %2
+}
 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
 
 ; TODO stack_fold_rsqrtps




More information about the llvm-commits mailing list