[llvm] r258309 - AVX512: Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint intrinsic implementation.

Igor Breger via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 20 05:11:47 PST 2016


Author: ibreger
Date: Wed Jan 20 07:11:47 2016
New Revision: 258309

URL: http://llvm.org/viewvc/llvm-project?rev=258309&view=rev
Log:
AVX512: Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint intrinsic implementation.

Differential Revision: http://reviews.llvm.org/D16350

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=258309&r1=258308&r2=258309&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed Jan 20 07:11:47 2016
@@ -2030,6 +2030,18 @@ let TargetPrefix = "x86" in {  // All in
                   [IntrReadWriteArgMem]>;
 }
 
+// Store ops using non-temporal hint
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx512_storent_q_512 :
+        GCCBuiltin<"__builtin_ia32_movntdq512">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty], [IntrReadWriteArgMem]>;
+  def int_x86_avx512_storent_pd_512 :
+        GCCBuiltin<"__builtin_ia32_movntpd512">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty], [IntrReadWriteArgMem]>;  
+  def int_x86_avx512_storent_ps_512 :
+        GCCBuiltin<"__builtin_ia32_movntps512">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty], [IntrReadWriteArgMem]>;
+}
 //===----------------------------------------------------------------------===//
 // AVX2
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258309&r1=258308&r2=258309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 20 07:11:47 2016
@@ -4229,10 +4229,11 @@ bool X86TargetLowering::getTgtMemIntrins
     break;
   }
   case STOREA:
+  case STOREANT:
   case STOREU: {
     Info.ptrVal = I.getArgOperand(0);
     Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
-    Info.align = (IntrData->Type == STOREA ? Info.memVT.getSizeInBits()/8 : 1);
+    Info.align = (IntrData->Type == STOREU ? 1 : Info.memVT.getSizeInBits()/8);
     Info.writeMem = true;
     break;
   }
@@ -17739,6 +17740,20 @@ static SDValue LowerINTRINSIC_W_CHAIN(SD
     return DAG.getMaskedStore(Chain, dl, Data, Addr, VMask, VT,
                               MemIntr->getMemOperand(), false);
   }
+  case STOREANT: {
+    // Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint intrinsic implementation. 
+    SDValue Data = Op.getOperand(3);
+    SDValue Addr = Op.getOperand(2);
+    SDValue Chain = Op.getOperand(0);
+
+    MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
+    assert(MemIntr && "Expected MemIntrinsicSDNode!");
+    MachineMemOperand *MMO = MemIntr->getMemOperand();
+
+    MMO->setFlags(MachineMemOperand::MONonTemporal);
+
+    return DAG.getStore(Chain, dl, Data, Addr, MMO);
+  }
   }
 }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=258309&r1=258308&r2=258309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Jan 20 07:11:47 2016
@@ -3194,50 +3194,31 @@ let SchedRW = [WriteLoad] in {
   }
 }
 
-multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
-                        ValueType OpVT, RegisterClass RC, X86MemOperand memop,
-                        Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
+multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                        PatFrag st_frag = alignednontemporalstore,
+                        InstrItinClass itin = IIC_SSE_MOVNT> {
   let SchedRW = [WriteStore], mayStore = 1,
       AddedComplexity = 400 in
-  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
+                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
+                    _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
 }
 
-multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
-                           string elty, string elsz, string vsz512,
-                           string vsz256, string vsz128, Domain d,
-                           Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
-  let Predicates = [prd] in
-  defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
-                        !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
-                        !cast<X86MemOperand>(elty##"512mem"), d, itin>,
-                        EVEX_V512;
-
-  let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
-                             !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
-                             !cast<X86MemOperand>(elty##"256mem"), d, itin>,
-                             EVEX_V256;
-
-    defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
-                             !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
-                             !cast<X86MemOperand>(elty##"128mem"), d, itin>,
-                             EVEX_V128;
+multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
+                                                  AVX512VLVectorVTInfo VTInfo> {
+  let Predicates = [HasAVX512] in
+    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
+
+  let Predicates = [HasAVX512, HasVLX] in {
+    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
+    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
   }
 }
 
-defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
-                                "i", "64", "8", "4", "2", SSEPackedInt,
-                                HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
-
-defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
-                                "f", "64", "8", "4", "2", SSEPackedDouble,
-                                HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
-                                "f", "32", "16", "8", "4", SSEPackedSingle,
-                                HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
+defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
+defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - Integer arithmetic

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=258309&r1=258308&r2=258309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jan 20 07:11:47 2016
@@ -29,7 +29,7 @@ enum IntrinsicType {
   INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
   COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
-  EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC,
+  EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, STOREANT, BLEND, INSERT_SUBVEC,
   TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
   FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
 };
@@ -260,7 +260,9 @@ static const IntrinsicData IntrinsicsWit
   X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
   X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
   X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
-
+  X86_INTRINSIC_DATA(avx512_storent_pd_512, STOREANT, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_storent_ps_512, STOREANT, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_storent_q_512,  STOREANT, ISD::DELETED_NODE, 0),
   X86_INTRINSIC_DATA(rdpmc,     RDPMC,  X86ISD::RDPMC_DAG, 0),
   X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
   X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=258309&r1=258308&r2=258309&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Wed Jan 20 07:11:47 2016
@@ -7176,3 +7176,35 @@ define <2 x double>@test_int_x86_avx512_
   ret <2 x double> %res4
 }
 
+declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>)
+
+define void at test_storent_q_512(<8 x i64> %data, i8* %ptr) {
+; CHECK-LABEL: test_storent_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovntdq %zmm0, (%rdi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
+  ret void
+}
+
+declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>)
+
+define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
+; CHECK-LABEL: test_storent_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovntpd %zmm0, (%rdi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
+  ret void
+}
+
+declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>)
+
+define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
+; CHECK-LABEL: test_storent_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovntps %zmm0, (%rdi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data)
+  ret void
+}




More information about the llvm-commits mailing list