[llvm] r241671 - [X86][SSE] Added (V)ROUNDSD + (V)ROUNDSS stack folding support

Simon Pilgrim llvm-dev at redking.me.uk
Wed Jul 8 01:07:58 PDT 2015


Author: rksimon
Date: Wed Jul  8 03:07:57 2015
New Revision: 241671

URL: http://llvm.org/viewvc/llvm-project?rev=241671&view=rev
Log:
[X86][SSE] Added (V)ROUNDSD + (V)ROUNDSS stack folding support

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
    llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=241671&r1=241670&r2=241671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Jul  8 03:07:57 2015
@@ -1107,6 +1107,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm,  TB_ALIGN_16 },
     { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm,   TB_ALIGN_16 },
     { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
+    { X86::ROUNDSDr,        X86::ROUNDSDm,      0 },
+    { X86::ROUNDSSr,        X86::ROUNDSSm,      0 },
     { X86::SBB32rr,         X86::SBB32rm,       0 },
     { X86::SBB64rr,         X86::SBB64rm,       0 },
     { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
@@ -1403,6 +1405,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      0 },
     { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       0 },
     { X86::VPXORrr,           X86::VPXORrm,            0 },
+    { X86::VROUNDSDr,         X86::VROUNDSDm,          0 },
+    { X86::VROUNDSSr,         X86::VROUNDSSm,          0 },
     { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
     { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
     { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
@@ -6395,7 +6399,7 @@ static bool hasReassocSibling(const Mach
       hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
       MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
     return true;
-  
+
   return false;
 }
 
@@ -6500,7 +6504,7 @@ static void reassociateOps(MachineInstr
   MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
   MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
   MachineOperand &OpC = Root.getOperand(0);
-  
+
   unsigned RegA = OpA.getReg();
   unsigned RegB = OpB.getReg();
   unsigned RegX = OpX.getReg();
@@ -6535,7 +6539,7 @@ static void reassociateOps(MachineInstr
       .addReg(RegX, getKillRegState(KillX))
       .addReg(RegY, getKillRegState(KillY));
   InsInstrs.push_back(MIB1);
-  
+
   MachineInstrBuilder MIB2 =
     BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
       .addReg(RegA, getKillRegState(KillA))
@@ -6567,7 +6571,7 @@ void X86InstrInfo::genAlternativeCodeSeq
       Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
   }
   assert(Prev && "Unknown pattern for machine combiner");
-  
+
   reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
   return;
 }

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll?rev=241671&r1=241670&r2=241671&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll Wed Jul  8 03:07:57 2015
@@ -1409,12 +1409,26 @@ define <8 x float> @stack_fold_roundps_y
 }
 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
 
-; TODO stack_fold_roundsd
+define double @stack_fold_roundsd(double %a0) optsize {
+  ;CHECK-LABEL: stack_fold_roundsd
+  ;CHECK:       vroundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call double @llvm.floor.f64(double %a0)
+  ret double %2
+}
+declare double @llvm.floor.f64(double) nounwind readnone
 
 ; TODO stack_fold_roundsd_int
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
 
-; TODO stack_fold_roundss
+define float @stack_fold_roundss(float %a0) optsize {
+  ;CHECK-LABEL: stack_fold_roundss
+  ;CHECK:       vroundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call float @llvm.floor.f32(float %a0)
+  ret float %2
+}
+declare float @llvm.floor.f32(float) nounwind readnone
 
 ; TODO stack_fold_roundss_int
 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll?rev=241671&r1=241670&r2=241671&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll Wed Jul  8 03:07:57 2015
@@ -884,11 +884,29 @@ define <4 x float> @stack_fold_roundps(<
 }
 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
 
-; TODO stack_fold_roundsd
+define double @stack_fold_roundsd(double %a0) optsize {
+  ;CHECK-LABEL: stack_fold_roundsd
+  ;CHECK:       roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call double @llvm.floor.f64(double %a0)
+  ret double %2
+}
+declare double @llvm.floor.f64(double) nounwind readnone
+
 ; TODO stack_fold_roundsd_int
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+define float @stack_fold_roundss(float %a0) optsize {
+  ;CHECK-LABEL: stack_fold_roundss
+  ;CHECK:       roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call float @llvm.floor.f32(float %a0)
+  ret float %2
+}
+declare float @llvm.floor.f32(float) nounwind readnone
 
-; TODO stack_fold_roundss
 ; TODO stack_fold_roundss_int
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
 
 ; TODO stack_fold_rsqrtps
 





More information about the llvm-commits mailing list