[llvm] r362440 - [X86] Fix the pattern for merge masked vcvtps2pd.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 3 12:29:14 PDT 2019


Author: ctopper
Date: Mon Jun  3 12:29:14 2019
New Revision: 362440

URL: http://llvm.org/viewvc/llvm-project?rev=362440&view=rev
Log:
[X86] Fix the pattern for merge masked vcvtps2pd.

r362199 fixed it for zero masking, but not zero masking. The load
folding in the peephole pass hid the bug. This patch turns off
the peephole pass on the relevant test to ensure coverage.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=362440&r1=362439&r2=362440&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jun  3 12:29:14 2019
@@ -7629,10 +7629,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, s
                          (ins MaskRC:$mask, MemOp:$src),
                          OpcodeStr#Alias, "$src", "$src",
                          LdDAG,
-                         (vselect MaskRC:$mask,
-                                  (_.VT (OpNode (_Src.VT
-                                                 (_Src.LdFrag addr:$src)))),
-                                  _.RC:$src0),
+                         (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
                          vselect, "$src0 = $dst">,
                          EVEX, Sched<[sched.Folded]>;
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=362440&r1=362439&r2=362440&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Mon Jun  3 12:29:14 2019
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
 
 
 define <16 x float> @sitof32(<16 x i32> %a) nounwind {
@@ -786,9 +786,34 @@ define <4 x double> @f32to4f64_mask(<4 x
   ret <4 x double> %c
 }
 
-define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) {
 ; NOVL-LABEL: f32to4f64_mask_load:
 ; NOVL:       # %bb.0:
+; NOVL-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
+; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm3
+; NOVL-NEXT:    vcmpltpd %zmm1, %zmm0, %k1
+; NOVL-NEXT:    vblendmpd %zmm3, %zmm2, %zmm0 {%k1}
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVL-NEXT:    retq
+;
+; VL-LABEL: f32to4f64_mask_load:
+; VL:       # %bb.0:
+; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
+; VL-NEXT:    vcvtps2pd (%rdi), %ymm2 {%k1}
+; VL-NEXT:    vmovaps %ymm2, %ymm0
+; VL-NEXT:    retq
+  %b = load <4 x float>, <4 x float>* %p
+  %a = fpext <4 x float> %b to <4 x double>
+  %mask = fcmp ogt <4 x double> %a1, %b1
+  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru
+  ret <4 x double> %c
+}
+
+define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+; NOVL-LABEL: f32to4f64_maskz_load:
+; NOVL:       # %bb.0:
 ; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm2
@@ -797,7 +822,7 @@ define <4 x double> @f32to4f64_mask_load
 ; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NOVL-NEXT:    retq
 ;
-; VL-LABEL: f32to4f64_mask_load:
+; VL-LABEL: f32to4f64_maskz_load:
 ; VL:       # %bb.0:
 ; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
 ; VL-NEXT:    vcvtps2pd (%rdi), %ymm0 {%k1} {z}




More information about the llvm-commits mailing list