[PATCH] D32236: PR32710: Disable using PMADDWD for unsigned short.

Dehao Chen via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 19 11:41:45 PDT 2017


danielcdh created this revision.

PMADDWD can only handle signed short.


https://reviews.llvm.org/D32236

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/madd.ll


Index: test/CodeGen/X86/madd.ll
===================================================================
--- test/CodeGen/X86/madd.ll
+++ test/CodeGen/X86/madd.ll
@@ -54,6 +54,48 @@
   ret i32 %13
 }
 
+;SSE2-label: @_Z10test_shortPsS_i
+;SSE2-NOT: pmaddwd
+
+;AVX2-label: @_Z10test_shortPsS_i
+;AVX2-NOT: vpmaddwd
+
+;AVX512-label: @_Z10test_shortPsS_i
+;AVX512-NOT: vpmaddwd
+
+define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 {
+entry:
+  %3 = zext i32 %2 to i64
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %vec.phi = phi <8 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ]
+  %4 = getelementptr inbounds i16, i16* %0, i64 %index
+  %5 = bitcast i16* %4 to <8 x i16>*
+  %wide.load = load <8 x i16>, <8 x i16>* %5, align 2
+  %6 = zext <8 x i16> %wide.load to <8 x i32>
+  %7 = getelementptr inbounds i16, i16* %1, i64 %index
+  %8 = bitcast i16* %7 to <8 x i16>*
+  %wide.load14 = load <8 x i16>, <8 x i16>* %8, align 2
+  %9 = zext <8 x i16> %wide.load14 to <8 x i32>
+  %10 = mul nsw <8 x i32> %9, %6
+  %11 = add nsw <8 x i32> %10, %vec.phi
+  %index.next = add i64 %index, 8
+  %12 = icmp eq i64 %index.next, %3
+  br i1 %12, label %middle.block, label %vector.body
+
+middle.block:
+  %rdx.shuf = shufflevector <8 x i32> %11, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %11, %rdx.shuf
+  %rdx.shuf15 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx16 = add <8 x i32> %bin.rdx, %rdx.shuf15
+  %rdx.shuf17 = shufflevector <8 x i32> %bin.rdx16, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx18 = add <8 x i32> %bin.rdx16, %rdx.shuf17
+  %13 = extractelement <8 x i32> %bin.rdx18, i32 0
+  ret i32 %13
+}
+
 ;AVX2-label: @_Z9test_charPcS_i
 ;AVX2:       vpmovsxbw
 ;AVX2-NEXT:  vpmovsxbw
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -34631,7 +34631,7 @@
     return SDValue();
 
   ShrinkMode Mode;
-  if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode))
+  if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
     return SDValue();
 
   EVT VT = N->getValueType(0);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D32236.95792.patch
Type: text/x-patch
Size: 2534 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170419/c4a098e2/attachment.bin>


More information about the llvm-commits mailing list