[llvm] r280837 - Don't reduce the width of vector mul if the target doesn't support SSE2.
Wei Mi via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 7 11:22:19 PDT 2016
Author: wmi
Date: Wed Sep 7 13:22:17 2016
New Revision: 280837
URL: http://llvm.org/viewvc/llvm-project?rev=280837&view=rev
Log:
Don't reduce the width of vector mul if the target doesn't support SSE2.
The patch is to fix PR30298, which is caused by rL272694. The solution is to
bail out if the target has no SSE2.
Differential Revision: https://reviews.llvm.org/D24288
Added:
llvm/trunk/test/CodeGen/X86/pr30298.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=280837&r1=280836&r2=280837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Sep 7 13:22:17 2016
@@ -28077,7 +28077,8 @@ static SDValue reduceVMULWidth(SDNode *N
const X86Subtarget &Subtarget) {
// pmulld is supported since SSE41. It is better to use pmulld
// instead of pmullw+pmulhw.
- if (Subtarget.hasSSE41())
+ // pmullw/pmulhw are not supported by SSE.
+ if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())
return SDValue();
ShrinkMode Mode;
Added: llvm/trunk/test/CodeGen/X86/pr30298.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr30298.ll?rev=280837&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr30298.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr30298.ll Wed Sep 7 13:22:17 2016
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i386-pc-linux-gnu -mattr=+sse < %s | FileCheck %s
+
+ at c = external global i32*, align 8
+
+define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) nounwind {
+; CHECK-LABEL: mul_2xi8:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl c, %esi
+; CHECK-NEXT: movzbl 1(%edx,%ecx), %edi
+; CHECK-NEXT: movzbl (%edx,%ecx), %edx
+; CHECK-NEXT: movzbl 1(%eax,%ecx), %ebx
+; CHECK-NEXT: movzbl (%eax,%ecx), %eax
+; CHECK-NEXT: imull %edx, %eax
+; CHECK-NEXT: imull %edi, %ebx
+; CHECK-NEXT: movl %ebx, 4(%esi,%ecx,4)
+; CHECK-NEXT: movl %eax, (%esi,%ecx,4)
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: retl
+entry:
+ %pre = load i32*, i32** @c
+ %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
+ %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
+ %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
+ %tmp8 = zext <2 x i8> %wide.load to <2 x i32>
+ %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
+ %tmp11 = bitcast i8* %tmp10 to <2 x i8>*
+ %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
+ %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32>
+ %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
+ %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
+ %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
+ store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
+ ret void
+}
More information about the llvm-commits
mailing list