[llvm-commits] [llvm] r99910 - in /llvm/trunk: include/llvm/IntrinsicsX86.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrSSE.td lib/VMCore/AutoUpgrade.cpp test/Bitcode/sse41_pmulld.ll test/Bitcode/sse41_pmulld.ll.bc test/CodeGen/X86/pmul.ll test/CodeGen/X86/pmulld.ll
Eric Christopher
echristo at apple.com
Tue Mar 30 11:49:01 PDT 2010
Author: echristo
Date: Tue Mar 30 13:49:01 2010
New Revision: 99910
URL: http://llvm.org/viewvc/llvm-project?rev=99910&view=rev
Log:
Remove the pmulld intrinsic and autoupdate it as a vector multiply.
Rewrite the pmulld patterns, and make sure that they fold in loads of
arguments into the instruction.
Added:
llvm/trunk/test/Bitcode/sse41_pmulld.ll
llvm/trunk/test/Bitcode/sse41_pmulld.ll.bc (with props)
llvm/trunk/test/CodeGen/X86/pmulld.ll
Modified:
llvm/trunk/include/llvm/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/VMCore/AutoUpgrade.cpp
llvm/trunk/test/CodeGen/X86/pmul.ll
Modified: llvm/trunk/include/llvm/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=99910&r1=99909&r2=99910&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IntrinsicsX86.td Tue Mar 30 13:49:01 2010
@@ -810,9 +810,6 @@
def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
- def int_x86_sse41_pmulld : GCCBuiltin<"__builtin_ia32_pmulld128">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem, Commutative]>;
}
// Vector extract
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=99910&r1=99909&r2=99910&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Mar 30 13:49:01 2010
@@ -597,7 +597,6 @@
{ X86::PMULHUWrr, X86::PMULHUWrm, 16 },
{ X86::PMULHWrr, X86::PMULHWrm, 16 },
{ X86::PMULLDrr, X86::PMULLDrm, 16 },
- { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 },
{ X86::PMULLWrr, X86::PMULLWrm, 16 },
{ X86::PMULUDQrr, X86::PMULUDQrm, 16 },
{ X86::PORrr, X86::PORrm, 16 },
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=99910&r1=99909&r2=99910&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Mar 30 13:49:01 2010
@@ -3448,8 +3448,28 @@
OpSize;
}
}
-defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
- int_x86_sse41_pmulld, 1>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+let Constraints = "$src1 = $dst" in {
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+}
+
+defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
let Constraints = "$src1 = $dst" in {
Modified: llvm/trunk/lib/VMCore/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AutoUpgrade.cpp?rev=99910&r1=99909&r2=99910&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/VMCore/AutoUpgrade.cpp Tue Mar 30 13:49:01 2010
@@ -225,7 +225,12 @@
// Calls to these intrinsics are transformed into ShuffleVector's.
NewFn = 0;
return true;
+ } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
+ // Calls to these intrinsics are transformed into vector multiplies.
+ NewFn = 0;
+ return true;
}
+
break;
}
@@ -355,6 +360,18 @@
// Clean up the old call now that it has been completely upgraded.
CI->eraseFromParent();
+ } else if (F->getName() == "llvm.x86.sse41.pmulld") {
+ // Upgrade this set of intrinsics into vector multiplies.
+ Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
+ CI->getOperand(2),
+ CI->getName(),
+ CI);
+ // Fix up all the uses with our new multiply.
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(Mul);
+
+ // Remove upgraded multiply.
+ CI->eraseFromParent();
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
Added: llvm/trunk/test/Bitcode/sse41_pmulld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/sse41_pmulld.ll?rev=99910&view=auto
==============================================================================
--- llvm/trunk/test/Bitcode/sse41_pmulld.ll (added)
+++ llvm/trunk/test/Bitcode/sse41_pmulld.ll Tue Mar 30 13:49:01 2010
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
+; RUN: llvm-dis < %s.bc | grep mul
\ No newline at end of file
Added: llvm/trunk/test/Bitcode/sse41_pmulld.ll.bc
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/sse41_pmulld.ll.bc?rev=99910&view=auto
==============================================================================
Binary file - no diff available.
Propchange: llvm/trunk/test/Bitcode/sse41_pmulld.ll.bc
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=99910&r1=99909&r2=99910&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Tue Mar 30 13:49:01 2010
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 12
+; RUN: grep mov %t | count 11
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
Added: llvm/trunk/test/CodeGen/X86/pmulld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmulld.ll?rev=99910&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmulld.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pmulld.ll Tue Mar 30 13:49:01 2010
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK-NEXT: pmulld
+ %C = mul <4 x i32> %A, %B
+ ret <4 x i32> %C
+}
+
+define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
+; CHECK: test1a:
+; CHECK-NEXT: pmulld
+ %B = load <4 x i32>* %Bp
+ %C = mul <4 x i32> %A, %B
+ ret <4 x i32> %C
+}
More information about the llvm-commits
mailing list