[llvm] r360915 - [X86] Use 0x9 instead of 0x1 as the immediate in some masked floor pattern. Similarly change 0x2 to 0xA for ceil.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu May 16 09:53:50 PDT 2019


Author: ctopper
Date: Thu May 16 09:53:50 2019
New Revision: 360915

URL: http://llvm.org/viewvc/llvm-project?rev=360915&view=rev
Log:
[X86] Use 0x9 instead of 0x1 as the immediate in some masked floor pattern. Similarly change 0x2 to 0xA for ceil.

This suppresses exceptions which is what we should be doing for ceil and floor. We already use the correct immediate
in patterns without masking.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/vec_floor.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=360915&r1=360914&r2=360915&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu May 16 09:53:50 2019
@@ -9410,13 +9410,13 @@ multiclass avx512_masked_scalar_imm<SDNo
 }
 
 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
-                                v4f32x_info, fp32imm0, 0x01, HasAVX512>;
+                                v4f32x_info, fp32imm0, 0x09, HasAVX512>;
 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
-                                v4f32x_info, fp32imm0, 0x02, HasAVX512>;
+                                v4f32x_info, fp32imm0, 0x0A, HasAVX512>;
 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
-                                v2f64x_info, fp64imm0, 0x01, HasAVX512>;
+                                v2f64x_info, fp64imm0, 0x09, HasAVX512>;
 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
-                                v2f64x_info, fp64imm0, 0x02,  HasAVX512>;
+                                v2f64x_info, fp64imm0, 0x0A,  HasAVX512>;
 
 
 //-------------------------------------------------

Modified: llvm/trunk/test/CodeGen/X86/vec_floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_floor.ll?rev=360915&r1=360914&r2=360915&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_floor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_floor.ll Thu May 16 09:53:50 2019
@@ -1374,7 +1374,7 @@ define <4 x float> @floor_mask_ss(<4 x f
 ; AVX512-LABEL: floor_mask_ss:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
@@ -1415,7 +1415,7 @@ define <4 x float> @floor_maskz_ss(<4 x
 ; AVX512-LABEL: floor_maskz_ss:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
   %nmask = icmp eq i8 %mask, 0
@@ -1452,7 +1452,7 @@ define <2 x double> @floor_mask_sd(<2 x
 ; AVX512-LABEL: floor_mask_sd:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
@@ -1493,7 +1493,7 @@ define <2 x double> @floor_maskz_sd(<2 x
 ; AVX512-LABEL: floor_maskz_sd:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
   %nmask = icmp eq i8 %mask, 0
@@ -1530,7 +1530,7 @@ define <4 x float> @floor_mask_ss_trunc(
 ; AVX512-LABEL: floor_mask_ss_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
@@ -1573,7 +1573,7 @@ define <4 x float> @floor_maskz_ss_trunc
 ; AVX512-LABEL: floor_maskz_ss_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
   %s = extractelement <4 x float> %x, i64 0
@@ -1609,7 +1609,7 @@ define <2 x double> @floor_mask_sd_trunc
 ; AVX512-LABEL: floor_mask_sd_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
@@ -1652,7 +1652,7 @@ define <2 x double> @floor_maskz_sd_trun
 ; AVX512-LABEL: floor_maskz_sd_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
   %s = extractelement <2 x double> %x, i64 0
@@ -1684,7 +1684,7 @@ define <4 x float> @floor_mask_ss_mask8(
 ; AVX512-LABEL: floor_mask_ss_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
@@ -1717,7 +1717,7 @@ define <4 x float> @floor_maskz_ss_mask8
 ; AVX512-LABEL: floor_maskz_ss_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
   %mask = extractelement <4 x i1> %mask1, i64 0
@@ -1750,7 +1750,7 @@ define <2 x double> @floor_mask_sd_mask8
 ; AVX512-LABEL: floor_mask_sd_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y
@@ -1783,7 +1783,7 @@ define <2 x double> @floor_maskz_sd_mask
 ; AVX512-LABEL: floor_maskz_sd_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y
   %mask = extractelement <2 x i1> %mask1, i64 0
@@ -2350,7 +2350,7 @@ define <4 x float> @ceil_mask_ss(<4 x fl
 ; AVX512-LABEL: ceil_mask_ss:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
@@ -2391,7 +2391,7 @@ define <4 x float> @ceil_maskz_ss(<4 x f
 ; AVX512-LABEL: ceil_maskz_ss:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
   %nmask = icmp eq i8 %mask, 0
@@ -2428,7 +2428,7 @@ define <2 x double> @ceil_mask_sd(<2 x d
 ; AVX512-LABEL: ceil_mask_sd:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
@@ -2469,7 +2469,7 @@ define <2 x double> @ceil_maskz_sd(<2 x
 ; AVX512-LABEL: ceil_maskz_sd:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = and i8 %k, 1
   %nmask = icmp eq i8 %mask, 0
@@ -2506,7 +2506,7 @@ define <4 x float> @ceil_mask_ss_trunc(<
 ; AVX512-LABEL: ceil_mask_ss_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
@@ -2549,7 +2549,7 @@ define <4 x float> @ceil_maskz_ss_trunc(
 ; AVX512-LABEL: ceil_maskz_ss_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
   %s = extractelement <4 x float> %x, i64 0
@@ -2585,7 +2585,7 @@ define <2 x double> @ceil_mask_sd_trunc(
 ; AVX512-LABEL: ceil_mask_sd_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
@@ -2628,7 +2628,7 @@ define <2 x double> @ceil_maskz_sd_trunc
 ; AVX512-LABEL: ceil_maskz_sd_trunc:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = trunc i16 %k to i1
   %s = extractelement <2 x double> %x, i64 0
@@ -2660,7 +2660,7 @@ define <4 x float> @ceil_mask_ss_mask8(<
 ; AVX512-LABEL: ceil_mask_ss_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
@@ -2693,7 +2693,7 @@ define <4 x float> @ceil_maskz_ss_mask8(
 ; AVX512-LABEL: ceil_maskz_ss_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
   %mask = extractelement <4 x i1> %mask1, i64 0
@@ -2726,7 +2726,7 @@ define <2 x double> @ceil_mask_sd_mask8(
 ; AVX512-LABEL: ceil_mask_sd_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y
@@ -2759,7 +2759,7 @@ define <2 x double> @ceil_maskz_sd_mask8
 ; AVX512-LABEL: ceil_maskz_sd_mask8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y
   %mask = extractelement <2 x i1> %mask1, i64 0




More information about the llvm-commits mailing list