[llvm-commits] [llvm] r130791 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td lib/VMCore/AutoUpgrade.cpp test/Assembler/AutoUpgradeIntrinsics.ll test/CodeGen/X86/nontemporal.ll
Bill Wendling
isanbard at gmail.com
Tue May 3 14:11:17 PDT 2011
Author: void
Date: Tue May 3 16:11:17 2011
New Revision: 130791
URL: http://llvm.org/viewvc/llvm-project?rev=130791&view=rev
Log:
Replace the "movnt" intrinsics with a native store + nontemporal metadata bit.
<rdar://problem/8460511>
Added:
llvm/trunk/test/CodeGen/X86/nontemporal.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/VMCore/AutoUpgrade.cpp
llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=130791&r1=130790&r2=130791&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue May 3 16:11:17 2011
@@ -1875,21 +1875,6 @@
// SSE 1 & 2 - Non-temporal stores
//===----------------------------------------------------------------------===//
-def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
-def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
-
-let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
-
let AddedComplexity = 400 in { // Prefer non-temporal versions
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
@@ -1906,12 +1891,16 @@
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)]>, VEX;
+
let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>;
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
@@ -1943,18 +1932,6 @@
def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
(VMOVNTPSYmr addr:$dst, VR256:$src)>;
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-
let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -1972,22 +1949,19 @@
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
+
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
TB, Requires<[HasSSE2]>;
-
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
TB, Requires<[HasSSE2]>;
-
}
-def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
- TB, Requires<[HasSSE2]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Misc Instructions (No AVX form)
Modified: llvm/trunk/lib/VMCore/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AutoUpgrade.cpp?rev=130791&r1=130790&r2=130791&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/VMCore/AutoUpgrade.cpp Tue May 3 16:11:17 2011
@@ -533,6 +533,13 @@
// Calls to these instructions are transformed into unaligned loads.
NewFn = 0;
return true;
+ } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
+ Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
+ Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
+ Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
+ // Calls to these instructions are transformed into nontemporal stores.
+ NewFn = 0;
+ return true;
} else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
// This is an SSE/MMX instruction.
const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
@@ -975,6 +982,31 @@
// Remove intrinsic.
CI->eraseFromParent();
+ } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
+ F->getName() == "llvm.x86.sse2.movnt.dq" ||
+ F->getName() == "llvm.x86.sse2.movnt.pd" ||
+ F->getName() == "llvm.x86.sse2.movnt.i") {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ Module *M = F->getParent();
+ SmallVector<Value *, 1> Elts;
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+ MDNode *Node = MDNode::get(C, Elts);
+
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = Builder.CreateBitCast(Arg0,
+ PointerType::getUnqual(Arg1->getType()),
+ "cast");
+ StoreInst *SI = Builder.CreateStore(Arg1, BC);
+ SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ SI->setAlignment(16);
+
+ // Remove intrinsic.
+ CI->eraseFromParent();
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
Modified: llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll?rev=130791&r1=130790&r2=130791&view=diff
==============================================================================
--- llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll (original)
+++ llvm/trunk/test/Assembler/AutoUpgradeIntrinsics.ll Tue May 3 16:11:17 2011
@@ -10,6 +10,7 @@
; RUN: not grep {llvm\\.x86\\.sse2\\.loadu}
; RUN: llvm-as < %s | llvm-dis | \
; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
declare i32 @llvm.ctpop.i28(i28 %val)
declare i32 @llvm.cttz.i29(i29 %val)
@@ -91,3 +92,20 @@
%v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
ret void
}
+
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone
+declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/X86/nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal.ll?rev=130791&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal.ll (added)
+++ llvm/trunk/test/CodeGen/X86/nontemporal.ll Tue May 3 16:11:17 2011
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+; CHECK: movntps
+ %cast = bitcast i8* %B to <4 x float>*
+ store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+; CHECK: movntdq
+ %cast1 = bitcast i8* %B to <2 x i64>*
+ store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: movntpd
+ %cast2 = bitcast i8* %B to <2 x double>*
+ store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+ %cast3 = bitcast i8* %B to i32*
+ store i32 %D, i32* %cast3, align 16, !nontemporal !0
+ ret void
+}
+
+!0 = metadata !{i32 1}
More information about the llvm-commits
mailing list