[llvm] r334648 - [X86] Move RCPSSr_Int, RSQRTSSr_Int, SQRTSDr_Int, SQRTSSr_Int to the correct load folding table.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 13 13:03:42 PDT 2018


Author: ctopper
Date: Wed Jun 13 13:03:42 2018
New Revision: 334648

URL: http://llvm.org/viewvc/llvm-project?rev=334648&view=rev
Log:
[X86] Move RCPSSr_Int, RSQRTSSr_Int, SQRTSDr_Int, SQRTSSr_Int to the correct load folding table.

They were in the operand 1 folding table, but their foldable operand is operand 2.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=334648&r1=334647&r2=334648&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Jun 13 13:03:42 2018
@@ -654,20 +654,16 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::PTESTrr,         X86::PTESTrm,             TB_ALIGN_16 },
     { X86::RCPPSr,          X86::RCPPSm,              TB_ALIGN_16 },
     { X86::RCPSSr,          X86::RCPSSm,              0 },
-    { X86::RCPSSr_Int,      X86::RCPSSm_Int,          TB_NO_REVERSE },
     { X86::ROUNDPDr,        X86::ROUNDPDm,            TB_ALIGN_16 },
     { X86::ROUNDPSr,        X86::ROUNDPSm,            TB_ALIGN_16 },
     { X86::ROUNDSDr,        X86::ROUNDSDm,            0 },
     { X86::ROUNDSSr,        X86::ROUNDSSm,            0 },
     { X86::RSQRTPSr,        X86::RSQRTPSm,            TB_ALIGN_16 },
     { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
-    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        TB_NO_REVERSE },
     { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
     { X86::SQRTPSr,         X86::SQRTPSm,             TB_ALIGN_16 },
     { X86::SQRTSDr,         X86::SQRTSDm,             0 },
-    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         TB_NO_REVERSE },
     { X86::SQRTSSr,         X86::SQRTSSm,             0 },
-    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int,         TB_NO_REVERSE },
     // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
     { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },
     { X86::UCOMISDrr_Int,   X86::UCOMISDrm_Int,       TB_NO_REVERSE },
@@ -1370,14 +1366,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm,  TB_ALIGN_16 },
     { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm,   TB_ALIGN_16 },
     { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
+    { X86::RCPSSr_Int,      X86::RCPSSm_Int,    TB_NO_REVERSE },
     { X86::ROUNDSDr_Int,    X86::ROUNDSDm_Int,  TB_NO_REVERSE },
     { X86::ROUNDSSr_Int,    X86::ROUNDSSm_Int,  TB_NO_REVERSE },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,  TB_NO_REVERSE },
     { X86::SBB16rr,         X86::SBB16rm,       0 },
     { X86::SBB32rr,         X86::SBB32rm,       0 },
     { X86::SBB64rr,         X86::SBB64rm,       0 },
     { X86::SBB8rr,          X86::SBB8rm,        0 },
     { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
     { X86::SHUFPSrri,       X86::SHUFPSrmi,     TB_ALIGN_16 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,   TB_NO_REVERSE },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int,   TB_NO_REVERSE },
     { X86::SUB16rr,         X86::SUB16rm,       0 },
     { X86::SUB32rr,         X86::SUB32rm,       0 },
     { X86::SUB64rr,         X86::SUB64rm,       0 },

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll?rev=334648&r1=334647&r2=334648&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll Wed Jun 13 13:03:42 2018
@@ -964,7 +964,17 @@ define <4 x float> @stack_fold_rcpps_int
 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
 
 ; TODO stack_fold_rcpss
-; TODO stack_fold_rcpss_int
+
+define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0, <4 x float> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_rcpss_int
+  ;CHECK:       rcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = insertelement <4 x float> %a0, float %3, i32 0
+  ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
 
 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
   ;CHECK-LABEL: stack_fold_roundpd
@@ -1032,7 +1042,17 @@ define <4 x float> @stack_fold_rsqrtps_i
 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
 
 ; TODO stack_fold_rsqrtss
-; TODO stack_fold_rsqrtss_int
+
+define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_rsqrtss_int
+  ;CHECK:       rsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = insertelement <4 x float> %a0, float %3, i32 0
+  ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
 
 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
   ;CHECK-LABEL: stack_fold_shufpd
@@ -1079,7 +1099,15 @@ define double @stack_fold_sqrtsd(double
 }
 declare double @llvm.sqrt.f64(double) nounwind readnone
 
-; TODO stack_fold_sqrtsd_int
+define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_sqrtsd_int
+  ;CHECK:       sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1)
+  %3 = extractelement <2 x double> %2, i32 0
+  %4 = insertelement <2 x double> %a0, double %3, i32 0
+  ret <2 x double> %4
+}
 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
 define float @stack_fold_sqrtss(float %a0) minsize {
@@ -1091,8 +1119,16 @@ define float @stack_fold_sqrtss(float %a
 }
 declare float @llvm.sqrt.f32(float) nounwind readnone
 
-; TODO stack_fold_sqrtss_int
-declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
+  ;CHECK-LABEL: stack_fold_sqrtss_int
+  ;CHECK:       sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = insertelement <4 x float> %a0, float %3, i32 0
+  ret <4 x float> %4
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
 
 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
   ;CHECK-LABEL: stack_fold_subpd




More information about the llvm-commits mailing list