[llvm-commits] [llvm] r76843 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_insertps-1.ll
Eric Christopher
echristo at apple.com
Wed Jul 22 19:23:02 PDT 2009
Author: echristo
Date: Wed Jul 22 21:22:41 2009
New Revision: 76843
URL: http://llvm.org/viewvc/llvm-project?rev=76843&view=rev
Log:
Support insertps via the intrinsic and add a couple of simple
testcases to make sure it's being generated.
Added:
llvm/trunk/test/CodeGen/X86/vec_insertps-1.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=76843&r1=76842&r2=76843&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jul 22 21:22:41 2009
@@ -3590,15 +3590,19 @@
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
let Constraints = "$src1 = $dst" in {
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ def match_rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst,
(X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ def match_rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -3608,6 +3612,14 @@
}
}
+let Constraints = "$src1 = $dst" in {
+ def INSERTPSrr : SS4AIi8<0x21, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ "insertps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst, (int_x86_sse41_insertps VR128:$src1,
+ VR128:$src2, imm:$src3))]>;
+}
+
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
let Defs = [EFLAGS] in {
Added: llvm/trunk/test/CodeGen/X86/vec_insertps-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insertps-1.ll?rev=76843&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insertps-1.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_insertps-1.ll Wed Jul 22 21:22:41 2009
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep insertps | count 2
+
+define <4 x float> @t1(<4 x float> %t1, <4 x float> %t2) nounwind {
+ %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+ ret <4 x float> %tmp1
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @t2(<4 x float> %t1, float %t2) nounwind {
+ %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+ ret <4 x float> %tmp1
+}
\ No newline at end of file
More information about the llvm-commits
mailing list