[llvm] r373021 - [X86] Add CodeGenOnly instructions for (f32 (X86selects $mask, (loadf32 addr), fp32imm0) to use masked MOVSS from memory.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 26 15:23:09 PDT 2019


Author: ctopper
Date: Thu Sep 26 15:23:09 2019
New Revision: 373021

URL: http://llvm.org/viewvc/llvm-project?rev=373021&view=rev
Log:
[X86] Add CodeGenOnly instructions for (f32 (X86selects $mask, (loadf32 addr), fp32imm0) to use masked MOVSS from memory.

Similar for f64 and having a non-zero passthru value.

We were previously not trying to fold the load at all. Using
a CodeGenOnly instruction allows us to use FR32X/FR64X as the
register class to avoid a bunch of COPY_TO_REGCLASS.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
    llvm/trunk/test/CodeGen/X86/pr38803.ll
    llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=373021&r1=373020&r2=373021&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Sep 26 15:23:09 2019
@@ -3958,6 +3958,18 @@ multiclass avx512_move_scalar<string asm
                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
                "$dst {${mask}} {z}, $src}"),
                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
+    let isCodeGenOnly = 1 in {
+    def rmk_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst),
+                  (ins _.FRC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
+                  !strconcat(asm, "\t{$src, $dst {${mask}}|",
+                  "$dst {${mask}}, $src}"),
+                  [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
+    def rmkz_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst),
+                   (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
+                   !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
+                   "$dst {${mask}} {z}, $src}"),
+                   [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
+    }
   }
   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
@@ -4222,16 +4234,26 @@ def : Pat<(f32 (X86selects VK1WM:$mask,
           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
 
+def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
+          (VMOVSSZrmk_alt FR32X:$src0, VK1WM:$mask, addr:$src)>;
+def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
+          (VMOVSSZrmkz_alt VK1WM:$mask, addr:$src)>;
+
 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
 
-def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
+def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
 
+def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
+          (VMOVSDZrmk_alt FR64X:$src0, VK1WM:$mask, addr:$src)>;
+def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
+          (VMOVSDZrmkz_alt VK1WM:$mask, addr:$src)>;
+
 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
                            (ins VR128X:$src1, VR128X:$src2),

Modified: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cmp.ll?rev=373021&r1=373020&r2=373021&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll Thu Sep 26 15:23:09 2019
@@ -70,9 +70,8 @@ define float @test5(float %p) #0 {
 ; ALL-NEXT:    retq
 ; ALL-NEXT:  LBB3_1: ## %if.end
 ; ALL-NEXT:    vcmpltss %xmm0, %xmm1, %k1
-; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; ALL-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; ALL-NEXT:    vmovss {{.*}}(%rip), %xmm0 {%k1}
 ; ALL-NEXT:    retq
 entry:
   %cmp = fcmp oeq float %p, 0.000000e+00

Modified: llvm/trunk/test/CodeGen/X86/pr38803.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr38803.ll?rev=373021&r1=373020&r2=373021&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr38803.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr38803.ll Thu Sep 26 15:23:09 2019
@@ -13,8 +13,7 @@ define float @_Z3fn2v() {
 ; CHECK-NEXT:    callq _Z1av
 ; CHECK-NEXT:    # kill: def $al killed $al def $eax
 ; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vmovss {{.*}}(%rip), %xmm0 {%k1} {z}
 ; CHECK-NEXT:    cmpl $0, {{.*}}(%rip)
 ; CHECK-NEXT:    je .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.then

Modified: llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll?rev=373021&r1=373020&r2=373021&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll Thu Sep 26 15:23:09 2019
@@ -84,10 +84,9 @@ define float @fcmp_select_fp_constants(f
 ;
 ; X64_AVX512F-LABEL: fcmp_select_fp_constants:
 ; X64_AVX512F:       # %bb.0:
-; X64_AVX512F-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64_AVX512F-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %k1
 ; X64_AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64_AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64_AVX512F-NEXT:    vmovss {{.*}}(%rip), %xmm0 {%k1}
 ; X64_AVX512F-NEXT:    retq
  %c = fcmp une float %x, -4.0
  %r = select i1 %c, float 42.0, float 23.0




More information about the llvm-commits mailing list