[llvm] r271481 - [X86] Add AVX 256-bit load and stores to fast isel.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 1 21:19:45 PDT 2016


Author: ctopper
Date: Wed Jun  1 23:19:45 2016
New Revision: 271481

URL: http://llvm.org/viewvc/llvm-project?rev=271481&view=rev
Log:
[X86] Add AVX 256-bit load and stores to fast isel.

I'm not sure why this was missing for so long.

This also exposed that we were picking floating point 256-bit VMOVNTPS for some integer types in normal isel for AVX1 even though VMOVNTDQ is available. In practice it doesn't matter due to the execution dependency fix pass, but it required extra isel patterns. Fixing that in a follow up commit.

Modified:
    llvm/trunk/lib/Target/X86/X86FastISel.cpp
    llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll

Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=271481&r1=271480&r2=271481&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Wed Jun  1 23:19:45 2016
@@ -348,6 +348,7 @@ bool X86FastISel::isTypeLegal(Type *Ty,
 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
                                   MachineMemOperand *MMO, unsigned &ResultReg,
                                   unsigned Alignment) {
+  bool HasAVX = Subtarget->hasAVX();
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
   const TargetRegisterClass *RC = nullptr;
@@ -373,7 +374,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
     break;
   case MVT::f32:
     if (X86ScalarSSEf32) {
-      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+      Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
       RC  = &X86::FR32RegClass;
     } else {
       Opc = X86::LD_Fp32m;
@@ -382,7 +383,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
     break;
   case MVT::f64:
     if (X86ScalarSSEf64) {
-      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+      Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
       RC  = &X86::FR64RegClass;
     } else {
       Opc = X86::LD_Fp64m;
@@ -394,16 +395,16 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
     return false;
   case MVT::v4f32:
     if (Alignment >= 16)
-      Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+      Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
     else
-      Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+      Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
     RC  = &X86::VR128RegClass;
     break;
   case MVT::v2f64:
     if (Alignment >= 16)
-      Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+      Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
     else
-      Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+      Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
     RC  = &X86::VR128RegClass;
     break;
   case MVT::v4i32:
@@ -411,11 +412,29 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
   case MVT::v8i16:
   case MVT::v16i8:
     if (Alignment >= 16)
-      Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+      Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
     else
-      Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+      Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
     RC  = &X86::VR128RegClass;
     break;
+  case MVT::v8f32:
+    assert(HasAVX);
+    Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
+    RC  = &X86::VR256RegClass;
+    break;
+  case MVT::v4f64:
+    assert(HasAVX);
+    Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
+    RC  = &X86::VR256RegClass;
+    break;
+  case MVT::v8i32:
+  case MVT::v4i64:
+  case MVT::v16i16:
+  case MVT::v32i8:
+    assert(HasAVX);
+    Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
+    RC  = &X86::VR256RegClass;
+    break;
   }
 
   ResultReg = createResultReg(RC);
@@ -508,7 +527,31 @@ bool X86FastISel::X86FastEmitStore(EVT V
       else
         Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
     } else
-      Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
+      Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
+    break;
+  case MVT::v8f32:
+    assert(HasAVX);
+    if (Aligned)
+      Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
+    else
+      Opc = X86::VMOVUPSYmr;
+    break;
+  case MVT::v4f64:
+    assert(HasAVX);
+    if (Aligned) {
+      Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
+    } else
+      Opc = X86::VMOVUPDYmr;
+    break;
+  case MVT::v8i32:
+  case MVT::v4i64:
+  case MVT::v16i16:
+  case MVT::v32i8:
+    assert(HasAVX);
+    if (Aligned)
+      Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
+    else
+      Opc = X86::VMOVDQUYmr;
     break;
   }
 

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll?rev=271481&r1=271480&r2=271481&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll Wed Jun  1 23:19:45 2016
@@ -260,7 +260,7 @@ define void @test_nt32xi8(<32 x i8>* noc
 ;
 ; AVX-LABEL: test_nt32xi8:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
@@ -282,7 +282,7 @@ define void @test_nt16xi16(<16 x i16>* n
 ;
 ; AVX-LABEL: test_nt16xi16:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
@@ -304,7 +304,7 @@ define void @test_nt8xi32(<8 x i32>* noc
 ;
 ; AVX-LABEL: test_nt8xi32:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
@@ -404,8 +404,8 @@ define void @test_nt64xi8(<64 x i8>* noc
 ;
 ; AVX-LABEL: test_nt64xi8:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovntps %ymm0, (%rdi)
-; AVX-NEXT:    vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
+; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
@@ -435,8 +435,8 @@ define void @test_nt32xi16(<32 x i16>* n
 ;
 ; AVX-LABEL: test_nt32xi16:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovntps %ymm0, (%rdi)
-; AVX-NEXT:    vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
+; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
@@ -466,8 +466,8 @@ define void @test_nt16xi32(<16 x i32>* n
 ;
 ; AVX-LABEL: test_nt16xi32:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovntps %ymm0, (%rdi)
-; AVX-NEXT:    vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
+; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;




More information about the llvm-commits mailing list