[llvm] r226676 - [X86] Declare SSE4.1/AVX2 vector extloads covered by PMOV[SZ]X legal.

Ahmed Bougacha ahmed.bougacha at gmail.com
Wed Jan 21 09:07:06 PST 2015


Author: ab
Date: Wed Jan 21 11:07:06 2015
New Revision: 226676

URL: http://llvm.org/viewvc/llvm-project?rev=226676&view=rev
Log:
[X86] Declare SSE4.1/AVX2 vector extloads covered by PMOV[SZ]X legal.

Now that we can fully specify extload legality, we can declare them
legal for the PMOVSX/PMOVZX instructions.  This for instance enables
a DAGCombine to fire on code such as
  (and (<zextload-equivalent> ...), <redundant mask>)
to turn it into:
  (zextload ...)
as seen in the testcase changes.

There is one regression, in widen_load-2.ll: we're no longer able
to do store-to-load forwarding with illegal extload memory types.
This will be addressed separately.

Differential Revision: http://reviews.llvm.org/D6533

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
    llvm/trunk/test/CodeGen/X86/pointer-vector.ll
    llvm/trunk/test/CodeGen/X86/widen_load-2.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=226676&r1=226675&r2=226676&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 21 11:07:06 2015
@@ -1136,6 +1136,21 @@ void X86TargetLowering::resetOperationAc
       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
     }
 
+    // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+
     // i8 and i16 vectors are custom because the source register and source
     // source memory operand types are not the same width.  f32 vectors are
     // custom since the immediate controlling the insert encodes additional
@@ -1315,6 +1330,21 @@ void X86TargetLowering::resetOperationAc
 
       // Custom CTPOP always performs better on natively supported v8i32
       setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
+
+      // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
+
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
     } else {
       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=226676&r1=226675&r2=226676&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jan 21 11:07:06 2015
@@ -6120,7 +6120,7 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32
 defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
 
 // AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
   // Register-Register patterns
   def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
@@ -6154,6 +6154,22 @@ multiclass SS41I_pmovx_avx2_patterns<str
   def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))),
             (!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
 
+  // Simple Register-Memory patterns
+  def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+
+  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+
+  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+
   // AVX2 Register-Memory patterns
   def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
@@ -6211,13 +6227,13 @@ multiclass SS41I_pmovx_avx2_patterns<str
 }
 
 let Predicates = [HasAVX2] in {
-  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
-  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
+  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
+  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
 }
 
 // SSE4.1/AVX patterns.
-multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
-                                PatFrag ExtLoad16> {
+multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
+                                SDNode ExtOp, PatFrag ExtLoad16> {
   def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
   def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
@@ -6233,6 +6249,21 @@ multiclass SS41I_pmovx_patterns<string O
   def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
             (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
 
+  def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+
+  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+
+  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+
   def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
@@ -6295,13 +6326,13 @@ multiclass SS41I_pmovx_patterns<string O
 }
 
 let Predicates = [HasAVX] in {
-  defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
-  defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
+  defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
+  defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
 }
 
 let Predicates = [UseSSE41] in {
-  defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
-  defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
+  defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
+  defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll?rev=226676&r1=226675&r2=226676&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll Wed Jan 21 11:07:06 2015
@@ -15,8 +15,6 @@ define void @test_avx2_pmovx_256(<8 x i8
 ; CHECK-LABEL: test_avx2_pmovx_256
 ; We really don't care about the generated code.
 ; CHECK: vpmovzxbd
-; CHECK: vpbroadcastd
-; CHECK: vpand
 ; CHECK: vcvtdq2ps
 ; CHECK: vmovups
 ; CHECK: vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/pointer-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pointer-vector.ll?rev=226676&r1=226675&r2=226676&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pointer-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pointer-vector.ll Wed Jan 21 11:07:06 2015
@@ -81,8 +81,7 @@ define <4 x i32*> @INT2PTR1(<4 x i8>* %p
 entry:
   %G = load <4 x i8>* %p
 ;CHECK: movl
-;CHECK: pmovzxbd
-;CHECK: pand
+;CHECK: pmovzxbd (%
   %K = inttoptr <4 x i8> %G to <4 x i32*>
 ;CHECK: ret
   ret <4 x i32*> %K

Modified: llvm/trunk/test/CodeGen/X86/widen_load-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-2.ll?rev=226676&r1=226675&r2=226676&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_load-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_load-2.ll Wed Jan 21 11:07:06 2015
@@ -191,8 +191,9 @@ define void @rot(%i8vec3pack* nocapture
 ; CHECK-NEXT:    movd    %[[CONSTANT1]], %e[[R1:[abcd]]]x
 ; CHECK-NEXT:    movw    %[[R1]]x, (%[[PTR1:.*]])
 ; CHECK-NEXT:    movb    $1, 2(%[[PTR1]])
-; CHECK-NEXT:    pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
-; CHECK-NEXT:    pand    {{.*}}, %[[X0]]
+; CHECK-NEXT:    movl    (%[[PTR0]]), [[TMP1:%e[abcd]+x]]
+; CHECK-NEXT:    movl    [[TMP1]], [[TMP2:.*]]
+; CHECK-NEXT:    pmovzxbd [[TMP2]], %[[X0:xmm[0-9]+]]
 ; CHECK-NEXT:    pextrd  $1, %[[X0]], %e[[R0:[abcd]]]x
 ; CHECK-NEXT:    shrl    %e[[R0]]x
 ; CHECK-NEXT:    movd    %[[X0]], %e[[R1:[abcd]]]x





More information about the llvm-commits mailing list